Setup

To begin with, we will load a few libraries that we will use for the analysis. Note the use of reticulate as we will be using functions from the sklearn python library.

library(readr)
library(magrittr)
library(ggplot2)
library(parallel)

library(ElPiGraph.R)

library(reshape)
library(Rtsne)
library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(DDRTree)

## Loading required package: irlba

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:reshape':
## 
##     expand

library(irlba)

library(reticulate)
sk <- import("sklearn.manifold")

Now we will load the data, available from here. Data are already filtered and log-transformed, so we will not include any preprocessing.

Header <- read_delim(file = "Datasets/Nestorowa/nestorowa_corrected_log2_transformed_counts.txt.gz", col_names = FALSE, delim = " ", n_max = 1)

## Parsed with column specification:
## cols(
##   .default = col_character()
## )

## See spec(...) for full column specifications.

ExpMat <- read_delim(file = "Datasets/Nestorowa/nestorowa_corrected_log2_transformed_counts.txt.gz", col_names = FALSE, delim = " ", skip = 1)

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character()
## )
## See spec(...) for full column specifications.

Genes <- unlist(Header)
Cells <- unlist(ExpMat$X1)

ExpMat <- data.matrix(ExpMat[,-1])
colnames(ExpMat) <- Genes
rownames(ExpMat) <- Cells

Then we load the additional information and perform a bit of data cleaning

ExtraInfo_Head <- read_delim("Datasets/Nestorowa/all_cell_types.txt", col_names = FALSE, delim = "\t", n_max = 1)

## Parsed with column specification:
## cols(
##   .default = col_character()
## )

## See spec(...) for full column specifications.

ExtraInfo <- read_delim("Datasets/Nestorowa/all_cell_types.txt", col_names = FALSE, delim = "\t", skip = 1)

## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   X1 = col_character()
## )
## See spec(...) for full column specifications.

ExtraInfo_Bin <- data.matrix(ExtraInfo[,-1])
rownames(ExtraInfo_Bin) <- unlist(ExtraInfo$X1)
colnames(ExtraInfo_Bin) <- unlist(ExtraInfo_Head)

PopID_broad <- apply(ExtraInfo_Bin[,grep("broad", colnames(ExtraInfo_Bin))], 1, function(x){
  if(sum(x==1)>1 | sum(x==1)==0){
    return(NA)
  } else {
    return(which(x==1))
  }
})

PopID_broad <- colnames(ExtraInfo_Bin)[PopID_broad]
names(PopID_broad) <- unlist(ExtraInfo$X1)
names(PopID_broad) <- gsub(pattern = "-", replacement = ".", x = names(PopID_broad))

PopID_broad_Fil <- PopID_broad[rownames(ExpMat)]

table(PopID_broad_Fil, useNA = "if")

## PopID_broad_Fil
##   CMP_broad   GMP_broad  LMPP_broad LTHSC_broad   MEP_broad        <NA> 
##         317         120         246         267         354         341

Look at data

As a first step we check PCA

PCA <- prcomp_irlba(ExpMat, n = 2, retx = TRUE)

PlotPCA <- data.frame(PCA$x, type = PopID_broad_Fil)

ggplot(PlotPCA, aes(x = PC1, y = PC2, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "PCA of Raw data")

and tSNE

tictoc::tic()
tSNE <- Rtsne(ExpMat, dims = 3, initial_dims = 50, perplexity = 30)
tictoc::toc()

## 71.874 sec elapsed

PlotTSE<- data.frame(D1 = tSNE$Y[,1], D2 = tSNE$Y[,2], D3 = tSNE$Y[,3], type = PopID_broad_Fil)

ggplot(PlotTSE, aes(x = D1, y = D2, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of Raw data")

ggplot(PlotTSE, aes(x = D1, y = D3, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of Raw data")

ggplot(PlotTSE, aes(x = D2, y = D3, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of Raw data")

plotly::plot_ly(data = PlotTSE, x = ~D1, y = ~D2, z = ~D3, color = ~type, marker = list(size = 3))

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

and MLLE projection

tictoc::tic()
MLLE.Model <- sk$LocallyLinearEmbedding(n_neighbors=as.integer(ceiling(nrow(ExpMat)*.1)),
                                  n_components = as.integer(3),
                                  method = 'modified',
                                  eigen_solver = 'dense',
                                  n_jobs = as.integer(3),
                                  random_state = as.integer(10),
                                  neighbors_algorithm = 'kd_tree')

MLLE.Fit <- MLLE.Model$fit(ExpMat)
MLLE.Emb <- MLLE.Fit$embedding_
tictoc::toc()

## 47.199 sec elapsed

PlotMLE <- data.frame(D1 = MLLE.Emb[,1], D2 = MLLE.Emb[,2], D3 = MLLE.Emb[,3], type = PopID_broad_Fil)

ggplot(PlotMLE, aes(x = D1, y = D2, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of data")

ggplot(PlotMLE, aes(x = D1, y = D3, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of data")

ggplot(PlotMLE, aes(x = D2, y = D3, col = type)) + geom_point(size = 1, alpha=.7) + labs(title = "tSNE of data")

plotly::plot_ly(data = PlotMLE, x = ~D1, y = ~D2, z = ~D3, color = ~type, marker = list(size = 3))

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

and a knn graph

PCA_Proj <- irlba::prcomp_irlba(ExpMat, n = 50, retx = TRUE)

DistMatrix <- distutils::PartialDistance(PCA_Proj$x, PCA_Proj$x)

k = 4

DistNorm <- apply(DistMatrix, 1, function(x){
  Thr <- sort(x)[k+1]
  x[x>Thr] <- 0
  return(x)
})


knn_Graph <- graph_from_adjacency_matrix(DistNorm, mode = "undirected", diag = FALSE, weighted = "dist")
# knn_Graph <- simplify(knn_Graph, remove.loops = TRUE)

Labs <- unique(PopID_broad_Fil)
VertCol <- rainbow(length(Labs))
names(VertCol) <- Labs

AllVertCol <- VertCol[PopID_broad_Fil]
AllVertCol[is.na(AllVertCol)] <- Labs[is.na(names(VertCol))]

plot(knn_Graph, vertex.size = 1, vertex.label = NA,
     vertex.color = AllVertCol, vertex.frame.color = NA)

tictoc::tic()
FD_Layout_2 <- layout_with_fr(graph = knn_Graph, dim = 2,
                              weights = 10/E(knn_Graph)$dist, maxiter = 10000)
tictoc::toc()

## 3.302 sec elapsed

PlotFD2 <- data.frame(D1 = FD_Layout_2[,1], D2 = FD_Layout_2[,2], type = PopID_broad_Fil)

ggplot(PlotFD2, aes(x = D1, y = D2, col = type)) +
  geom_point(size = 1, alpha=.7) + labs(title = "FD 2D proj")

tictoc::tic()
FD_Layout_3 <- layout_with_fr(graph = knn_Graph, dim = 3, weights = 10/E(knn_Graph)$dist, maxiter = 5000)
tictoc::toc()

## 46.553 sec elapsed

PlotFD3 <- data.frame(D1 = FD_Layout_3[,1], D2 = FD_Layout_3[,2], D3 = FD_Layout_3[,3], type = PopID_broad_Fil)

plotly::plot_ly(data = PlotFD3, x = ~D1, y = ~D2, z = ~D3, color = ~type,
                marker = list(size = 3))

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

# ggplot(PlotFD2, aes(x = D1, y = D2, col = type)) +
#   geom_point(size = 1, alpha=.7) + labs(title = "FD 3D proj")

# plot3d(FD_Layout_3, col = Classification_Short_Col)
# legend3d("topright", names(Classification_Short_Col_Scale), col = Classification_Short_Col_Scale, pch = 1)

and we can even try DDRTree

tictoc::tic()
DDRTrans <- DDRTree(X = t(PCA_Proj$x), dimensions = 2, verbose = FALSE)
tictoc::toc()

## 15.929 sec elapsed

DDR.DF <- data.frame(D1 = DDRTrans$Z[1,],
                     D2 = DDRTrans$Z[2,],
                     type = PopID_broad_Fil)

plotly::plot_ly(data = DDR.DF, x = ~D1, y = ~D2,
                color = ~type, marker = list(size = 3))

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter

## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

tictoc::tic()
DDRTrans <- DDRTree(X = t(PCA_Proj$x), dimensions = 3, verbose = FALSE)
tictoc::toc()

## 15.907 sec elapsed

DDR.DF <- data.frame(D1 = DDRTrans$Z[1,],
                     D2 = DDRTrans$Z[2,],
                     D3 = DDRTrans$Z[3,],
                     type = PopID_broad_Fil)

plotly::plot_ly(data = DDR.DF, x = ~D1, y = ~D2, z = ~D3,
                color = ~type, marker = list(size = 3))

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

tictoc::tic()
DDRTrans <- DDRTree(X = t(ExpMat), dimensions = 3, verbose = FALSE)
tictoc::toc()

## 232.595 sec elapsed

DDR.DF <- data.frame(D1 = DDRTrans$Z[1,],
                     D2 = DDRTrans$Z[2,],
                     D3 = DDRTrans$Z[3,],
                     type = PopID_broad_Fil)

plotly::plot_ly(data = DDR.DF, x = ~D1, y = ~D2, z = ~D3,
                color = ~type, marker = list(size = 3))

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

Fitting a tree

Now, we’re ready to compute the tree that we will be using to explore the structure of the data. We will be using a trimming radius, so we need a genreal idea of the distance distribution in the data.

boxplot(dist(MLLE.Emb))

We can now fit a tree and optimize it.

Tree <- computeElasticPrincipalTree(X = MLLE.Emb, NumNodes = 10, Do_PCA = FALSE,
                                    TrimmingRadius = .05,
                                    ICOver = "Density", DensityRadius = .05,
                                    ParallelRep = TRUE,
                                    drawAccuracyComplexity = FALSE,
                                    drawPCAView = TRUE,
                                    drawEnergy = FALSE)

## [1] "Generating the initial configuration"
## [1] "Creating a line in the densest part of the data. DensityRadius needs to be specified!"
## [1] "Constructing tree 1 of 1 / Subset 1 of 1"
## [1] "The elastic matrix is being used. Edge configuration will be ignored"
## [1] "Computing EPG with 10 nodes on 1645 points and 3 dimensions"
## [1] "Using a single core"
## Nodes = 2 3 4 5 6 7 8 9 
## BARCODE  ENERGY  NNODES  NEDGES  NRIBS   NSTARS  NRAYS   NRAYS2  MSE MSEP    FVE FVEP    UE  UR  URN URN2    URSD
## 2||10    0.000252    10  9   4   2   0   0   0.0001864   Inf 0.8979  -Inf    6.151e-05   4.1e-06 4.1e-05 0.00041 0
## 0.721 sec elapsed
## [[1]]

Tree_Ext <- fineTuneBR(X = MLLE.Emb, NumNodes = 50, Do_PCA = FALSE, 
             TrimmingRadius = .05,
             InitNodePositions = Tree[[1]]$NodePositions,
             InitEdges = Tree[[1]]$Edges$Edges,
             drawAccuracyComplexity = FALSE, drawPCAView = TRUE,
             drawEnergy = FALSE)

## [1] "Constructing tree 1 of 1 / Subset 1 of 1"
## [1] "Computing EPG with 50 nodes on 1645 points and 3 dimensions"
## [1] "Using a single core"
## Nodes = 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 
## BARCODE  ENERGY  NNODES  NEDGES  NRIBS   NSTARS  NRAYS   NRAYS2  MSE MSEP    FVE FVEP    UE  UR  URN URN2    URSD
## 2||50    0.0001201   50  49  44  2   0   0   9.236e-05   Inf 0.9494  -Inf    2.104e-05   6.671e-06   0.0003336   0.01668 0
## 2.23 sec elapsed
## [[1]]

We will also compute a set of optimized bootstrapped trees to explore the robustness of the derived structure.

tictoc::tic()
Tree_Boot <- computeElasticPrincipalTree(X = MLLE.Emb, NumNodes = 10, Do_PCA = FALSE,
                                    TrimmingRadius = .05,
                                    ICOver = "Density", DensityRadius = .05,
                                    ProbPoint = .9, nReps = 100, n.cores = 4, ClusType = "FORK",
                                    ParallelRep = TRUE,
                                    drawAccuracyComplexity = FALSE,
                                    drawPCAView = FALSE,
                                    drawEnergy = FALSE)

## [1] "Creating a sock cluster with 4 nodes"
## [1] "Using parallel sampling analysis. Limited output available"
## [1] "Generating the initial configurations"
## [1] "Exporting initial configuration parameters"
## [1] "Exporting initial configuration parameters"
## [1] "Graphical output will be suppressed"
## [1] "Exporting parameters"
## [1] "Analysis is running ... no output will be shown"
## [1] "Sit back and relax, this may take a long time ..."
## 10.767 sec elapsed
## [1] "Constructing average tree"
## [1] "Creating a line in the densest part of the data. DensityRadius needs to be specified!"
## [1] "Using a user supplied cluster. Updating the value of X"
## [1] "Computing EPG with 10 nodes on 1000 points and 3 dimensions"
## [1] "Using a user supplied cluster. It must contains the data points in a matrix X"
## [1] "Exporting the additional variables to the cluster"
## Nodes = 2 3 4 5 6 7 8 9 
## BARCODE  ENERGY  NNODES  NEDGES  NRIBS   NSTARS  NRAYS   NRAYS2  MSE MSEP    FVE FVEP    UE  UR  URN URN2    URSD
## 2||10    7.778e-05   10  9   4   2   0   0   2.176e-05   9.918e-06   0.9868  0.994   5.504e-05   9.833e-07   9.833e-06   9.833e-05   0
## 0.37 sec elapsed

tictoc::toc()

## 18.517 sec elapsed

cl <- makeForkCluster(4)

tictoc::tic()
Tree_Ext_Boot <- parLapply(cl, Tree_Boot, function(x){
  fineTuneBR(X = MLLE.Emb, NumNodes = 50, Do_PCA = FALSE, 
             TrimmingRadius = .05, ProbPoint = .9,
             InitNodePositions = x$NodePositions,
             InitEdges = x$Edges$Edges,
             drawAccuracyComplexity = FALSE, drawPCAView = FALSE,
             drawEnergy = FALSE)[[1]]
})
tictoc::toc()

## 84.982 sec elapsed

stopCluster(cl)

By plotting the data we can make sure that our anlysis is reasonable.

PlotPG(MLLE.Emb, TargetPG = Tree_Ext[[1]], Tree_Ext_Boot[1:(length(Tree_Ext_Boot)-1)],
       GroupsLab = PopID_broad_Fil, DimToPlot = 1:3, LabMult = 4)

## [[1]]

## 
## [[2]]

## 
## [[3]]

We also extend the leaves to increase the definition of the pseudotime at the “border” of the data.

Extended <- ExtendLeaves(X = MLLE.Emb, TargetPG = Tree_Ext[[1]], Mode = "QuantCentroid", ControlPar = .9, TrimmingRadius = .025)

## [1] "4 points selected to compute the centroid while extending node 1"
## [1] "2 points selected to compute the centroid while extending node 3"
## [1] "3 points selected to compute the centroid while extending node 4"
## [1] "3 points selected to compute the centroid while extending node 6"
## [[1]]

PlotPG(MLLE.Emb, Extended, GroupsLab = PopID_broad_Fil, DimToPlot = 1:3,
       NodeLabels = 1:nrow(Extended$NodePositions), LabMult = 4)

## [[1]]

## 
## [[2]]

## 
## [[3]]

We can also look at gene expression over the path

p <- PlotPG(MLLE.Emb, Extended, GroupsLab = ExpMat[, "Cd48"], DimToPlot = 1:3,
       NodeLabels = 1:nrow(Extended$NodePositions), LabMult = 4)

print(p)

## [[1]]

## 
## [[2]]

## 
## [[3]]

the functions returns list of a ggplot objects

p[[1]] + scale_color_gradient(low = "blue", high = "red")

p[[2]] + scale_color_gradient(low = "blue", high = "red")

p[[3]] + scale_color_gradient(low = "blue", high = "red")

gg <- plotly::plotly_build(p[[1]] + scale_color_gradient(low = "blue", high = "red"))

## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

gg$x$data[[1]]$text <- paste("Cell:", rownames(ExpMat), "<br>",
                           "Value:", ExpMat[, "Cd48"])

gg

Pseudotime

To obtain the pseudotime we first need to get the relevant paths. We will use node 52 as root since LTHSC are predominantly associated with that branch.

Net <- ConstructGraph(PrintGraph = Extended)
Part <- PartitionData(X = MLLE.Emb, NodePositions = Extended$NodePositions)
PrjStruct <- project_point_onto_graph(X = MLLE.Emb,
                                      NodePositions = Extended$NodePositions,
                                      Edges = Extended$Edges$Edges,
                                      Partition = Part$Partition)

Paths <- GetSubGraph(Net, "end2end")


# Leaves <- which(degree(Net)==1)
# 
# which(Part$Partition %in% Leaves)

Paths_Norm <- lapply(Paths, function(x){
  if(x[length(x)] == "52"){
    return(rev(x))
  }
  
  if(x[1] == "52"){
    return(x)
  }
})

Paths_Norm <- Paths_Norm[sapply(Paths_Norm, function(x){!any(is.null(x))})]

Top get a better idea of how cells are associated with the different branches, we can also check the cells associted with the various branch

Branches <- GetSubGraph(Net, "branches")

TB <- sapply(Paths_Norm, function(x){
  sapply(Branches, function(y) {
    all(as.integer(y) %in% as.integer(x))
  })
})

1*TB

##          Path_1 Path_4 Path_5
## Branch_1      1      1      0
## Branch_2      1      0      0
## Branch_3      0      1      0
## Branch_4      1      1      1
## Branch_5      0      0      1

we can also look at how different cells are associted to different branches

BrPT <- which(degree(Net)>2)

Branches <- lapply(Branches, function(x){
  setdiff(as.integer(names(x)), BrPT)
})

CellsOnBrs <- lapply(Branches, function(x){
  which(Part$Partition %in% x)
})

Table <- sapply(CellsOnBrs, function(x){
  table(factor(PopID_broad_Fil[x], levels = unique(PopID_broad_Fil)))
})

Table

##             Branch_1 Branch_2 Branch_3 Branch_4 Branch_5
## LTHSC_broad       11      227       10        1       18
## LMPP_broad        18      201       17        1        0
## GMP_broad         43       10       54        9        1
## MEP_broad         27        4        1       77      238
## CMP_broad         86       68       65       79        7

This information can be used to give a name the differnet branches

names(Paths_Norm) <- c("LTHSC->CMP", "LTHSC->CMP/GMP", "LTHSC->CMP->MEP")

We are now able to get notable genes

SmoothGenes <- CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended, GroupsLab = PopID_broad_Fil, n.cores = 4, ClusType = "FORK",
                  Partition = Part$Partition, PrjStr = PrjStruct, Span = .2, Mode = "KW", ReturnGenes = TRUE)

## [1] "Feature selection by Kruskal-Wallis rank sum test on differential branches"

boxplot(SmoothGenes, ylab = "Kruskal-Wallis pv", log='y')

and plot them

CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended, GroupsLab = PopID_broad_Fil,
                  Partition = Part$Partition, PrjStr = PrjStruct, Span = .3, alpha = .05,
                  Features = names(SmoothGenes)[order(SmoothGenes, decreasing = FALSE)[1:27]], ScalePT = TRUE)

## [1] "3 pages will be plotted"

## [[1]]

## 
## [[2]]

## 
## [[3]]

CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended, GroupsLab = PopID_broad_Fil,
                  Partition = Part$Partition, PrjStr = PrjStruct, Span = .3, alpha = .05, TrajCol = TRUE,
                  Features = names(SmoothGenes)[order(SmoothGenes, decreasing = FALSE)[1:27]], ScalePT = TRUE)

## [1] "3 pages will be plotted"

## [[1]]

## 
## [[2]]

## 
## [[3]]

CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended, GroupsLab = PopID_broad_Fil,
                  n.cores = 4, ClusType = "FORK", alpha = .05,
                  Partition = Part$Partition, PrjStr = PrjStruct, Span = .2,
                  Mode = "MI", ModePar = "min", Features = 27,
                  ReturnGenes = FALSE, ScalePT = TRUE)

## [1] "Feature selection by mutual information"
## [1] "3 pages will be plotted"

## [[1]]

## 
## [[2]]

## 
## [[3]]

We can also further explore the the differences between path 1 and 2

CompareOnBranches(X = ExpMat, Paths = Paths_Norm[c(1,2)], TargetPG = Extended, GroupsLab = PopID_broad_Fil,
                                 n.cores = 4, ClusType = "FORK", TrajCol = TRUE,
                  Partition = Part$Partition, PrjStr = PrjStruct, Span = .2, Mode = "KW", ScalePT = TRUE, Features = 27)

## [1] "Feature selection by Kruskal-Wallis rank sum test on differential branches"
## [1] "3 pages will be plotted"

## [[1]]

## 
## [[2]]

## 
## [[3]]

Using bootstrapping

CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended,
                  BootPG = Tree_Ext_Boot[1:(length(Tree_Ext_Boot)-1)],
                  GroupsLab = PopID_broad_Fil,
                  Partition = Part$Partition, PrjStr = PrjStruct, Span = .3, alpha = .05,
                  Features = names(SmoothGenes)[order(SmoothGenes, decreasing = FALSE)[1:9]], ScalePT = TRUE)

## [1] "1 pages will be plotted"

## Warning: Removed 9 rows containing missing values (geom_rect).

Looking at a genes differentially expressed w.r.t. a branching point

which(degree(Net)>2)

## 2 5 
## 2 5

as.integer(neighborhood(Net, 1, 1)[[1]])

## [1]  1  7 51

OnBr <- Part$Partition %in% as.integer(neighborhood(Net, 1, 5)[[1]])
sum(OnBr)

## [1] 95

GenesPV <- apply(ExpMat, 2, function(x) {
  wilcox.test(x[OnBr], x[!OnBr], alternative = "two.sided")$p.value
})

names(sort(GenesPV)[1:10])

##  [1] "Apoe"    "S100a10" "Mfsd2b"  "Gata2"   "Clec4d"  "Vamp5"   "Cdk6"   
##  [8] "St8sia6" "Scin"    "Hdac7"

CompareOnBranches(X = ExpMat, Paths = Paths_Norm, TargetPG = Extended,
                  BootPG = Tree_Ext_Boot[1:(length(Tree_Ext_Boot)-1)],
                  GroupsLab = PopID_broad_Fil,
                  Partition = Part$Partition, PrjStr = PrjStruct,
                  Span = .3, alpha = .05,
                  Features = names(sort(GenesPV)[1:9]), ScalePT = TRUE)

## [1] "1 pages will be plotted"

## Warning: Removed 9 rows containing missing values (geom_rect).

NodesOnPath <- lapply(Paths_Norm, function(x) {
  if(any(as.integer(x)==5)){
    return(as.integer(x))
  }
})

OnBr <- Part$Partition %in% as.integer(neighborhood(Net, 1, 5)[[1]])
sum(OnBr)

## [1] 95

OffBr <- Part$Partition %in% unlist(NodesOnPath)
OffBr <- OffBr & !OnBr

GenesPV <- apply(ExpMat, 2, function(x) {
  wilcox.test(x[OnBr], x[OffBr], alternative = "two")$p.value
})

CompareOnBranches(X = ExpMat, Paths = Paths_Norm[c(1,3)], TargetPG = Extended,
                  BootPG = Tree_Ext_Boot[1:(length(Tree_Ext_Boot)-1)],
                  GroupsLab = PopID_broad_Fil, PlotOrg = TRUE,
                  Partition = Part$Partition, PrjStr = PrjStruct,
                  Span = .3, alpha = .05,
                  Features = names(sort(GenesPV)[1:9]), ScalePT = TRUE)