To begin with, we will load a few libraries that we will use for the analysis. Note the use of reticulate as we will be using functions from the sklearn python library.
library(readr)
library(magrittr)
library(ggplot2)
library(parallel)
library(ElPiGraph.R)
library(reshape)
library(Rtsne)
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(DDRTree)
## Loading required package: irlba
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:reshape':
##
## expand
library(irlba)
library(reticulate)
sk <- import("sklearn.manifold")
Now we will load the data, available from here. Data are already filtered and log-transformed, so we will not include any preprocessing.
Header <- read_delim(file = "Datasets/Nestorowa/nestorowa_corrected_log2_transformed_counts.txt.gz", col_names = FALSE, delim = " ", n_max = 1)
## Parsed with column specification:
## cols(
## .default = col_character()
## )
## See spec(...) for full column specifications.
ExpMat <- read_delim(file = "Datasets/Nestorowa/nestorowa_corrected_log2_transformed_counts.txt.gz", col_names = FALSE, delim = " ", skip = 1)
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character()
## )
## See spec(...) for full column specifications.
Genes <- unlist(Header)
Cells <- unlist(ExpMat$X1)
ExpMat <- data.matrix(ExpMat[,-1])
colnames(ExpMat) <- Genes
rownames(ExpMat) <- Cells
Then we load the additional information and perform a bit of data cleaning
ExtraInfo_Head <- read_delim("Datasets/Nestorowa/all_cell_types.txt", col_names = FALSE, delim = "\t", n_max = 1)
## Parsed with column specification:
## cols(
## .default = col_character()
## )
## See spec(...) for full column specifications.
ExtraInfo <- read_delim("Datasets/Nestorowa/all_cell_types.txt", col_names = FALSE, delim = "\t", skip = 1)
## Parsed with column specification:
## cols(
## .default = col_integer(),
## X1 = col_character()
## )
## See spec(...) for full column specifications.
ExtraInfo_Bin <- data.matrix(ExtraInfo[,-1])
rownames(ExtraInfo_Bin) <- unlist(ExtraInfo$X1)
colnames(ExtraInfo_Bin) <- unlist(ExtraInfo_Head)
PopID_broad <- apply(ExtraInfo_Bin[,grep("broad", colnames(ExtraInfo_Bin))], 1, function(x){
if(sum(x==1)>1 | sum(x==1)==0){
return(NA)
} else {
return(which(x==1))
}
})
PopID_broad <- colnames(ExtraInfo_Bin)[PopID_broad]
names(PopID_broad) <- unlist(ExtraInfo$X1)
names(PopID_broad) <- gsub(pattern = "-", replacement = ".", x = names(PopID_broad))
PopID_broad_Fil <- PopID_broad[rownames(ExpMat)]
table(PopID_broad_Fil, useNA = "if")
## PopID_broad_Fil
## CMP_broad GMP_broad LMPP_broad LTHSC_broad MEP_broad <NA>
## 317 120 246 267 354 341
As a first step we check PCA
PCA <- prcomp_irlba(ExpMat, n = 2, retx = TRUE)
PlotPCA <- data.frame(PCA$x, type = PopID_broad_Fil)
ggplot(PlotPCA, aes(x = PC1, y = PC2, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "PCA of Raw data")
and tSNE
tictoc::tic()
tSNE <- Rtsne(ExpMat, dims = 3, initial_dims = 50, perplexity = 30)
tictoc::toc()
## 71.874 sec elapsed
PlotTSE<- data.frame(D1 = tSNE$Y[,1], D2 = tSNE$Y[,2], D3 = tSNE$Y[,3], type = PopID_broad_Fil)
ggplot(PlotTSE, aes(x = D1, y = D2, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of Raw data")
ggplot(PlotTSE, aes(x = D1, y = D3, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of Raw data")
ggplot(PlotTSE, aes(x = D2, y = D3, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of Raw data")
plotly::plot_ly(data = PlotTSE, x = ~D1, y = ~D2, z = ~D3, color = ~type, marker = list(size = 3))
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
and MLLE projection
tictoc::tic()
MLLE.Model <- sk$LocallyLinearEmbedding(n_neighbors=as.integer(ceiling(nrow(ExpMat)*.1)),
n_components = as.integer(3),
method = 'modified',
eigen_solver = 'dense',
n_jobs = as.integer(3),
random_state = as.integer(10),
neighbors_algorithm = 'kd_tree')
MLLE.Fit <- MLLE.Model$fit(ExpMat)
MLLE.Emb <- MLLE.Fit$embedding_
tictoc::toc()
## 47.199 sec elapsed
PlotMLE <- data.frame(D1 = MLLE.Emb[,1], D2 = MLLE.Emb[,2], D3 = MLLE.Emb[,3], type = PopID_broad_Fil)
ggplot(PlotMLE, aes(x = D1, y = D2, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of data")
ggplot(PlotMLE, aes(x = D1, y = D3, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of data")
ggplot(PlotMLE, aes(x = D2, y = D3, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of data")
plotly::plot_ly(data = PlotMLE, x = ~D1, y = ~D2, z = ~D3, color = ~type, marker = list(size = 3))
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
and a knn graph
PCA_Proj <- irlba::prcomp_irlba(ExpMat, n = 50, retx = TRUE)
DistMatrix <- distutils::PartialDistance(PCA_Proj$x, PCA_Proj$x)
k = 4
DistNorm <- apply(DistMatrix, 1, function(x){
Thr <- sort(x)[k+1]
x[x>Thr] <- 0
return(x)
})
knn_Graph <- graph_from_adjacency_matrix(DistNorm, mode = "undirected", diag = FALSE, weighted = "dist")
# knn_Graph <- simplify(knn_Graph, remove.loops = TRUE)
Labs <- unique(PopID_broad_Fil)
VertCol <- rainbow(length(Labs))
names(VertCol) <- Labs
AllVertCol <- VertCol[PopID_broad_Fil]
AllVertCol[is.na(AllVertCol)] <- Labs[is.na(names(VertCol))]
plot(knn_Graph, vertex.size = 1, vertex.label = NA,
vertex.color = AllVertCol, vertex.frame.color = NA)
tictoc::tic()
FD_Layout_2 <- layout_with_fr(graph = knn_Graph, dim = 2,
weights = 10/E(knn_Graph)$dist, maxiter = 10000)
tictoc::toc()
## 3.302 sec elapsed
PlotFD2 <- data.frame(D1 = FD_Layout_2[,1], D2 = FD_Layout_2[,2], type = PopID_broad_Fil)
ggplot(PlotFD2, aes(x = D1, y = D2, col = type)) +
geom_point(size = 1, alpha=.7) + labs(title = "FD 2D proj")
tictoc::tic()
FD_Layout_3 <- layout_with_fr(graph = knn_Graph, dim = 3, weights = 10/E(knn_Graph)$dist, maxiter = 5000)
tictoc::toc()
## 46.553 sec elapsed
PlotFD3 <- data.frame(D1 = FD_Layout_3[,1], D2 = FD_Layout_3[,2], D3 = FD_Layout_3[,3], type = PopID_broad_Fil)
plotly::plot_ly(data = PlotFD3, x = ~D1, y = ~D2, z = ~D3, color = ~type,
marker = list(size = 3))
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
# ggplot(PlotFD2, aes(x = D1, y = D2, col = type)) +
# geom_point(size = 1, alpha=.7) + labs(title = "FD 3D proj")
# plot3d(FD_Layout_3, col = Classification_Short_Col)
# legend3d("topright", names(Classification_Short_Col_Scale), col = Classification_Short_Col_Scale, pch = 1)
and we can even try DDRTree
tictoc::tic()
DDRTrans <- DDRTree(X = t(PCA_Proj$x), dimensions = 2, verbose = FALSE)
tictoc::toc()
## 15.929 sec elapsed
DDR.DF <- data.frame(D1 = DDRTrans$Z[1,],
D2 = DDRTrans$Z[2,],
type = PopID_broad_Fil)
plotly::plot_ly(data = DDR.DF, x = ~D1, y = ~D2,
color = ~type, marker = list(size = 3))
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
tictoc::tic()
DDRTrans <- DDRTree(X = t(PCA_Proj$x), dimensions = 3, verbose = FALSE)
tictoc::toc()
## 15.907 sec elapsed
DDR.DF <- data.frame(D1 = DDRTrans$Z[1,],
D2 = DDRTrans$Z[2,],
D3 = DDRTrans$Z[3,],
type = PopID_broad_Fil)
plotly::plot_ly(data = DDR.DF, x = ~D1, y = ~D2, z = ~D3,
color = ~type, marker = list(size = 3))
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
tictoc::tic()
DDRTrans <- DDRTree(X = t(ExpMat), dimensions = 3, verbose = FALSE)
tictoc::toc()
## 232.595 sec elapsed
DDR.DF <- data.frame(D1 = DDRTrans$Z[1,],
D2 = DDRTrans$Z[2,],
D3 = DDRTrans$Z[3,],
type = PopID_broad_Fil)
plotly::plot_ly(data = DDR.DF, x = ~D1, y = ~D2, z = ~D3,
color = ~type, marker = list(size = 3))
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
Now, we’re ready to compute the tree that we will be using to explore the structure of the data. We will be using a trimming radius, so we need a genreal idea of the distance distribution in the data.
boxplot(dist(MLLE.Emb))
We can now fit a tree and optimize it.
Tree <- computeElasticPrincipalTree(X = MLLE.Emb, NumNodes = 10, Do_PCA = FALSE,
TrimmingRadius = .05,
ICOver = "Density", DensityRadius = .05,
ParallelRep = TRUE,
drawAccuracyComplexity = FALSE,
drawPCAView = TRUE,
drawEnergy = FALSE)
## [1] "Generating the initial configuration"
## [1] "Creating a line in the densest part of the data. DensityRadius needs to be specified!"
## [1] "Constructing tree 1 of 1 / Subset 1 of 1"
## [1] "The elastic matrix is being used. Edge configuration will be ignored"
## [1] "Computing EPG with 10 nodes on 1645 points and 3 dimensions"
## [1] "Using a single core"
## Nodes = 2 3 4 5 6 7 8 9
## BARCODE ENERGY NNODES NEDGES NRIBS NSTARS NRAYS NRAYS2 MSE MSEP FVE FVEP UE UR URN URN2 URSD
## 2||10 0.000252 10 9 4 2 0 0 0.0001864 Inf 0.8979 -Inf 6.151e-05 4.1e-06 4.1e-05 0.00041 0
## 0.721 sec elapsed
## [[1]]
Tree_Ext <- fineTuneBR(X = MLLE.Emb, NumNodes = 50, Do_PCA = FALSE,
TrimmingRadius = .05,
InitNodePositions = Tree[[1]]$NodePositions,
InitEdges = Tree[[1]]$Edges$Edges,
drawAccuracyComplexity = FALSE, drawPCAView = TRUE,
drawEnergy = FALSE)
## [1] "Constructing tree 1 of 1 / Subset 1 of 1"
## [1] "Computing EPG with 50 nodes on 1645 points and 3 dimensions"
## [1] "Using a single core"
## Nodes = 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## BARCODE ENERGY NNODES NEDGES NRIBS NSTARS NRAYS NRAYS2 MSE MSEP FVE FVEP UE UR URN URN2 URSD
## 2||50 0.0001201 50 49 44 2 0 0 9.236e-05 Inf 0.9494 -Inf 2.104e-05 6.671e-06 0.0003336 0.01668 0
## 2.23 sec elapsed
## [[1]]
We will also compute a set of optimized bootstrapped trees to explore the robustness of the derived structure.
tictoc::tic()
Tree_Boot <- computeElasticPrincipalTree(X = MLLE.Emb, NumNodes = 10, Do_PCA = FALSE,
TrimmingRadius = .05,
ICOver = "Density", DensityRadius = .05,
ProbPoint = .9, nReps = 100, n.cores = 4, ClusType = "FORK",
ParallelRep = TRUE,
drawAccuracyComplexity = FALSE,
drawPCAView = FALSE,
drawEnergy = FALSE)
## [1] "Creating a sock cluster with 4 nodes"
## [1] "Using parallel sampling analysis. Limited output available"
## [1] "Generating the initial configurations"
## [1] "Exporting initial configuration parameters"
## [1] "Exporting initial configuration parameters"
## [1] "Graphical output will be suppressed"
## [1] "Exporting parameters"
## [1] "Analysis is running ... no output will be shown"
## [1] "Sit back and relax, this may take a long time ..."
## 10.767 sec elapsed
## [1] "Constructing average tree"
## [1] "Creating a line in the densest part of the data. DensityRadius needs to be specified!"
## [1] "Using a user supplied cluster. Updating the value of X"
## [1] "Computing EPG with 10 nodes on 1000 points and 3 dimensions"
## [1] "Using a user supplied cluster. It must contains the data points in a matrix X"
## [1] "Exporting the additional variables to the cluster"
## Nodes = 2 3 4 5 6 7 8 9
## BARCODE ENERGY NNODES NEDGES NRIBS NSTARS NRAYS NRAYS2 MSE MSEP FVE FVEP UE UR URN URN2 URSD
## 2||10 7.778e-05 10 9 4 2 0 0 2.176e-05 9.918e-06 0.9868 0.994 5.504e-05 9.833e-07 9.833e-06 9.833e-05 0
## 0.37 sec elapsed
tictoc::toc()
## 18.517 sec elapsed
cl <- makeForkCluster(4)
tictoc::tic()
Tree_Ext_Boot <- parLapply(cl, Tree_Boot, function(x){
fineTuneBR(X = MLLE.Emb, NumNodes = 50, Do_PCA = FALSE,
TrimmingRadius = .05, ProbPoint = .9,
InitNodePositions = x$NodePositions,
InitEdges = x$Edges$Edges,
drawAccuracyComplexity = FALSE, drawPCAView = FALSE,
drawEnergy = FALSE)[[1]]
})
tictoc::toc()
## 84.982 sec elapsed
stopCluster(cl)
By plotting the data we can make sure that our anlysis is reasonable.
PlotPG(MLLE.Emb, TargetPG = Tree_Ext[[1]], Tree_Ext_Boot[1:(length(Tree_Ext_Boot)-1)],
GroupsLab = PopID_broad_Fil, DimToPlot = 1:3, LabMult = 4)
## [[1]]
##
## [[2]]
##
## [[3]]
We also extend the leaves to increase the definition of the pseudotime at the “border” of the data.
Extended <- ExtendLeaves(X = MLLE.Emb, TargetPG = Tree_Ext[[1]], Mode = "QuantCentroid", ControlPar = .9, TrimmingRadius = .025)
## [1] "4 points selected to compute the centroid while extending node 1"
## [1] "2 points selected to compute the centroid while extending node 3"
## [1] "3 points selected to compute the centroid while extending node 4"
## [1] "3 points selected to compute the centroid while extending node 6"
## [[1]]
PlotPG(MLLE.Emb, Extended, GroupsLab = PopID_broad_Fil, DimToPlot = 1:3,
NodeLabels = 1:nrow(Extended$NodePositions), LabMult = 4)
## [[1]]
##
## [[2]]
##
## [[3]]
We can also look at gene expression over the path
p <- PlotPG(MLLE.Emb, Extended, GroupsLab = ExpMat[, "Cd48"], DimToPlot = 1:3,
NodeLabels = 1:nrow(Extended$NodePositions), LabMult = 4)
print(p)
## [[1]]
##
## [[2]]
##
## [[3]]
the functions returns list of a ggplot objects
p[[1]] + scale_color_gradient(low = "blue", high = "red")
p[[2]] + scale_color_gradient(low = "blue", high = "red")
p[[3]] + scale_color_gradient(low = "blue", high = "red")
gg <- plotly::plotly_build(p[[1]] + scale_color_gradient(low = "blue", high = "red"))
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
gg$x$data[[1]]$text <- paste("Cell:", rownames(ExpMat), "<br>",
"Value:", ExpMat[, "Cd48"])
gg
To obtain the pseudotime we first need to get the relevant paths. We will use node 52 as root since LTHSC are predominantly associated with that branch.
Net <- ConstructGraph(PrintGraph = Extended)
Part <- PartitionData(X = MLLE.Emb, NodePositions = Extended$NodePositions)
PrjStruct <- project_point_onto_graph(X = MLLE.Emb,
NodePositions = Extended$NodePositions,
Edges = Extended$Edges$Edges,
Partition = Part$Partition)
Paths <- GetSubGraph(Net, "end2end")
# Leaves <- which(degree(Net)==1)
#
# which(Part$Partition %in% Leaves)
Paths_Norm <- lapply(Paths, function(x){
if(x[length(x)] == "52"){
return(rev(x))
}
if(x[1] == "52"){
return(x)
}
})
Paths_Norm <- Paths_Norm[sapply(Paths_Norm, function(x){!any(is.null(x))})]
Top get a better idea of how cells are associated with the different branches, we can also check the cells associted with the various branch
Branches <- GetSubGraph(Net, "branches")
TB <- sapply(Paths_Norm, function(x){
sapply(Branches, function(y) {
all(as.integer(y) %in% as.integer(x))
})
})
1*TB
## Path_1 Path_4 Path_5
## Branch_1 1 1 0
## Branch_2 1 0 0
## Branch_3 0 1 0
## Branch_4 1 1 1
## Branch_5 0 0 1
we can also look at how different cells are associted to different branches
BrPT <- which(degree(Net)>2)
Branches <- lapply(Branches, function(x){
setdiff(as.integer(names(x)), BrPT)
})
CellsOnBrs <- lapply(Branches, function(x){
which(Part$Partition %in% x)
})
Table <- sapply(CellsOnBrs, function(x){
table(factor(PopID_broad_Fil[x], levels = unique(PopID_broad_Fil)))
})
Table
## Branch_1 Branch_2 Branch_3 Branch_4 Branch_5
## LTHSC_broad 11 227 10 1 18
## LMPP_broad 18 201 17 1 0
## GMP_broad 43 10 54 9 1
## MEP_broad 27 4 1 77 238
## CMP_broad 86 68 65 79 7
This information can be used to give a name the differnet branches
names(Paths_Norm) <- c("LTHSC->CMP", "LTHSC->CMP/GMP", "LTHSC->CMP->MEP")
We are now able to get notable genes
SmoothGenes <- CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended, GroupsLab = PopID_broad_Fil, n.cores = 4, ClusType = "FORK",
Partition = Part$Partition, PrjStr = PrjStruct, Span = .2, Mode = "KW", ReturnGenes = TRUE)
## [1] "Feature selection by Kruskal-Wallis rank sum test on differential branches"
boxplot(SmoothGenes, ylab = "Kruskal-Wallis pv", log='y')
and plot them
CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended, GroupsLab = PopID_broad_Fil,
Partition = Part$Partition, PrjStr = PrjStruct, Span = .3, alpha = .05,
Features = names(SmoothGenes)[order(SmoothGenes, decreasing = FALSE)[1:27]], ScalePT = TRUE)
## [1] "3 pages will be plotted"
## [[1]]
##
## [[2]]
##
## [[3]]
CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended, GroupsLab = PopID_broad_Fil,
Partition = Part$Partition, PrjStr = PrjStruct, Span = .3, alpha = .05, TrajCol = TRUE,
Features = names(SmoothGenes)[order(SmoothGenes, decreasing = FALSE)[1:27]], ScalePT = TRUE)
## [1] "3 pages will be plotted"
## [[1]]
##
## [[2]]
##
## [[3]]
CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended, GroupsLab = PopID_broad_Fil,
n.cores = 4, ClusType = "FORK", alpha = .05,
Partition = Part$Partition, PrjStr = PrjStruct, Span = .2,
Mode = "MI", ModePar = "min", Features = 27,
ReturnGenes = FALSE, ScalePT = TRUE)
## [1] "Feature selection by mutual information"
## [1] "3 pages will be plotted"
## [[1]]
##
## [[2]]
##
## [[3]]
We can also further explore the the differences between path 1 and 2
CompareOnBranches(X = ExpMat, Paths = Paths_Norm[c(1,2)], TargetPG = Extended, GroupsLab = PopID_broad_Fil,
n.cores = 4, ClusType = "FORK", TrajCol = TRUE,
Partition = Part$Partition, PrjStr = PrjStruct, Span = .2, Mode = "KW", ScalePT = TRUE, Features = 27)
## [1] "Feature selection by Kruskal-Wallis rank sum test on differential branches"
## [1] "3 pages will be plotted"
## [[1]]
##
## [[2]]
##
## [[3]]
CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended,
BootPG = Tree_Ext_Boot[1:(length(Tree_Ext_Boot)-1)],
GroupsLab = PopID_broad_Fil,
Partition = Part$Partition, PrjStr = PrjStruct, Span = .3, alpha = .05,
Features = names(SmoothGenes)[order(SmoothGenes, decreasing = FALSE)[1:9]], ScalePT = TRUE)
## [1] "1 pages will be plotted"
## Warning: Removed 9 rows containing missing values (geom_rect).
which(degree(Net)>2)
## 2 5
## 2 5
as.integer(neighborhood(Net, 1, 1)[[1]])
## [1] 1 7 51
OnBr <- Part$Partition %in% as.integer(neighborhood(Net, 1, 5)[[1]])
sum(OnBr)
## [1] 95
GenesPV <- apply(ExpMat, 2, function(x) {
wilcox.test(x[OnBr], x[!OnBr], alternative = "two.sided")$p.value
})
names(sort(GenesPV)[1:10])
## [1] "Apoe" "S100a10" "Mfsd2b" "Gata2" "Clec4d" "Vamp5" "Cdk6"
## [8] "St8sia6" "Scin" "Hdac7"
CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended,
BootPG = Tree_Ext_Boot[1:(length(Tree_Ext_Boot)-1)],
GroupsLab = PopID_broad_Fil,
Partition = Part$Partition, PrjStr = PrjStruct,
Span = .3, alpha = .05,
Features = names(sort(GenesPV)[1:9]), ScalePT = TRUE)
## [1] "1 pages will be plotted"
## Warning: Removed 9 rows containing missing values (geom_rect).
NodesOnPath <- lapply(Paths_Norm, function(x) {
if(any(as.integer(x)==5)){
return(as.integer(x))
}
})
OnBr <- Part$Partition %in% as.integer(neighborhood(Net, 1, 5)[[1]])
sum(OnBr)
## [1] 95
OffBr <- Part$Partition %in% unlist(NodesOnPath)
OffBr <- OffBr & !OnBr
GenesPV <- apply(ExpMat, 2, function(x) {
wilcox.test(x[OnBr], x[OffBr], alternative = "two")$p.value
})
CompareOnBranches(X = ExpMat, Paths = Paths_Norm[c(1,3)], TargetPG = Extended,
BootPG = Tree_Ext_Boot[1:(length(Tree_Ext_Boot)-1)],
GroupsLab = PopID_broad_Fil, PlotOrg = TRUE,
Partition = Part$Partition, PrjStr = PrjStruct,
Span = .3, alpha = .05,
Features = names(sort(GenesPV)[1:9]), ScalePT = TRUE)
## [1] "1 pages will be plotted"