scGOclust_mouse_fly_gut_vignette

Yuyao Song

2023-05-31

Load packages

scGOclust is a package that leverages Gene Ontology to analyse the functional profile of cells with scRNA-seq data.

# load required libraries

library(Seurat)
## Attaching SeuratObject
library(pheatmap)
library(httr)

## if (!require("devtools")) install.packages("devtools")

## install latest from source
## for reprodubcibility we do not update dependencies
# devtools::install_github("YY-SONG0718/scGOclust", upgrade_dependencies = FALSE)

library(scGOclust)

#

2. Load input data

# get a gene to GO BP terms mapping table
# remove electronically inferred records

# sometimes ensembl complains about ssh certificate has expired, this is a known issue, run this code
httr::set_config(httr::config(ssl_verifypeer = FALSE)) 

mmu_tbl = ensemblToGo(species = 'mmusculus', GO_linkage_type = c('experimental', 'phylogenetic', 'computational', 'author', 'curator' ))
## query biomart
## including GO link types:
## c("EXP", "IDA", "IPI", "IMP", "IGI", "IEP", "HTP", "HDA", "HMP", "HGI", "HEP")c("IBA", "IBD", "IKR", "IRD")c("ISS", "ISO", "ISA", "ISM", "IGC", "RCA")c("TAS", "NAS")c("IC", "ND")
dme_tbl = ensemblToGo(species = 'dmelanogaster', GO_linkage_type = c('experimental', 'phylogenetic', 'computational', 'author', 'curator' ))
## query biomart
## including GO link types:
## c("EXP", "IDA", "IPI", "IMP", "IGI", "IEP", "HTP", "HDA", "HMP", "HGI", "HEP")c("IBA", "IBD", "IKR", "IRD")c("ISS", "ISO", "ISA", "ISM", "IGC", "RCA")c("TAS", "NAS")c("IC", "ND")
# load the gene expression raw count objects
data(mmu_subset)
data(dme_subset)
ls()
## [1] "dme_subset" "dme_tbl"    "mmu_subset" "mmu_tbl"

3. Build GO BP profile

## construct a Seurat object with GO BP as features

mmu_go_obj <- makeGOSeurat(ensembl_to_GO = mmu_tbl, feature_type = 'external_gene_name', seurat_obj = mmu_subset)
## collect data
## compute GO to cell matrix, might take a few secs
## time used: 1.74 secs
## returning GO Seurat object
dme_go_obj <- makeGOSeurat(ensembl_to_GO = dme_tbl, feature_type = 'external_gene_name', seurat_obj = dme_subset)
## collect data
## compute GO to cell matrix, might take a few secs
## time used: 0.32 secs
## returning GO Seurat object

4. Calculate cell type average GO BP profile

# specify the column with cell type annotation in seurat_obj@meta.data

mmu_ct_go <- getCellTypeGO(go_seurat_obj = mmu_go_obj, cell_type_col = 'cell_type_annotation')
## Centering and scaling data matrix
dme_ct_go <- getCellTypeGO(go_seurat_obj = dme_go_obj, cell_type_col = 'annotation')
## Centering and scaling data matrix

5. Calculate within-species cell type functional similariy

# heatmap of Pearson's correlation coefficient of cell type average BP profiles within species

mmu_corr = cellTypeGOCorr(cell_type_go = mmu_ct_go, corr_method = 'pearson')
pheatmap(mmu_corr)

dme_corr = cellTypeGOCorr(cell_type_go = dme_ct_go, corr_method = 'pearson')
pheatmap(dme_corr)

5. Calculate cross-species cell type functional similariy

# calculate Pearson's correlation coefficient of cell type average BP profiles across species

corr = crossSpeciesCellTypeGOCorr(species_1 = 'mmusculus', species_2 = 'dmelanogaster', cell_type_go_sp1 = mmu_ct_go, cell_type_go_sp2 = dme_ct_go, corr_method = 'pearson')
# cross-species cell type profile heatmap

pheatmap(corr, width = 9, height = 10)

pheatmap(corr, scale = 'column', width = 9, height = 10)

# sheatmap tries to put cells with higher values on the diagonal
# helpful when cross-species cell type similarity signal is less clear

slanter::sheatmap((corr + 0.5), width = 9, height = 10)

# scale by row or column to see relative similarity

slanter::sheatmap((corr + 0.5), scale = 'column', width = 9, height = 10)

6. Dimensional reduction and UMAP visualization of cells with GO profile

# analyze the cell-by-GO BP profile as a count matrix
mmu_go_analyzed = analyzeGOSeurat(go_seurat_obj = mmu_go_obj, cell_type_col = 'cell_type_annotation')
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 219
## Number of edges: 9789
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.4540
## Number of communities: 3
## Elapsed time: 0 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 12:13:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:13:07 Read 219 rows and found 50 numeric columns
## 12:13:07 Using Annoy for neighbor search, n_neighbors = 30
## 12:13:07 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:13:07 Writing NN index file to temp file /var/folders/37/wf962dk574750g0xnnlxjwvm0000gp/T//RtmpG1lmWY/file224c291eda6
## 12:13:07 Searching Annoy index using 1 thread, search_k = 3000
## 12:13:07 Annoy recall = 100%
## 12:13:07 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 12:13:08 Initializing from normalized Laplacian + noise (using irlba)
## 12:13:08 Commencing optimization for 500 epochs, with 7780 positive edges
## 12:13:09 Optimization finished
# UMAP plot of the analyzed cell-by-GO BP profile
# labeled by previously specified cell annotation column in meta.data

DimPlot(mmu_go_analyzed, label = TRUE) + NoLegend()

dme_go_analyzed = analyzeGOSeurat(go_seurat_obj = dme_go_obj, cell_type_col = 'annotation')
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 180
## Number of edges: 5928
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5779
## Number of communities: 3
## Elapsed time: 0 seconds
## 12:13:10 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:13:10 Read 180 rows and found 50 numeric columns
## 12:13:10 Using Annoy for neighbor search, n_neighbors = 30
## 12:13:10 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:13:10 Writing NN index file to temp file /var/folders/37/wf962dk574750g0xnnlxjwvm0000gp/T//RtmpG1lmWY/file224c5cf91c0c
## 12:13:10 Searching Annoy index using 1 thread, search_k = 3000
## 12:13:10 Annoy recall = 100%
## 12:13:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 12:13:12 Initializing from normalized Laplacian + noise (using irlba)
## 12:13:12 Commencing optimization for 500 epochs, with 6632 positive edges
## 12:13:12 Optimization finished
DimPlot(dme_go_analyzed, label = TRUE) + NoLegend()

7. Get co-up and co-down regulated terms between pairs of cell types

## calculation takes a few minutes due to the Wilcoxon signed rank test

ct_shared_go = getCellTypeSharedGO(species_1 = 'mmusculus', species_2 = 'dmelanogaster', analyzed_go_seurat_sp1 = mmu_go_analyzed, analyzed_go_seurat_sp2 = dme_go_analyzed, cell_type_col_sp1 = 'cell_type_annotation', cell_type_col_sp2 = 'annotation')

head(ct_shared_go)
# query shared GO terms for specific cell type pairs

getCellTypeSharedTerms(shared_go = ct_shared_go,
                       cell_type_sp1 = 'intestine_Enteroendocrine cell', 
                       cell_type_sp2 = 'enteroendocrine cell',
                       return_full = FALSE)
plotCellTypeSankey(corr_matrix = corr, corr_threshould = 0.05)
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] scGOclust_0.1.0    httr_1.4.5         pheatmap_1.0.12    SeuratObject_4.1.3
## [5] Seurat_4.3.0      
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3             spatstat.explore_3.1-0 reticulate_1.28       
##   [4] tidyselect_1.2.0       RSQLite_2.2.18         AnnotationDbi_1.56.2  
##   [7] htmlwidgets_1.6.2      grid_4.1.2             Rtsne_0.16            
##  [10] devtools_2.4.3         munsell_0.5.0          codetools_0.2-18      
##  [13] ica_1.0-3              future_1.32.0          miniUI_0.1.1.1        
##  [16] withr_2.5.0            spatstat.random_3.1-4  colorspace_2.1-0      
##  [19] progressr_0.13.0       Biobase_2.54.0         filelock_1.0.2        
##  [22] highr_0.10             knitr_1.42             rstudioapi_0.13       
##  [25] stats4_4.1.2           ROCR_1.0-11            tensor_1.5            
##  [28] listenv_0.9.0          labeling_0.4.2         GenomeInfoDbData_1.2.7
##  [31] polyclip_1.10-4        farver_2.1.1           bit64_4.0.5           
##  [34] rprojroot_2.0.3        parallelly_1.35.0      vctrs_0.6.1           
##  [37] generics_0.1.3         xfun_0.38              BiocFileCache_2.2.1   
##  [40] R6_2.5.1               GenomeInfoDb_1.30.1    bitops_1.0-7          
##  [43] spatstat.utils_3.0-2   cachem_1.0.7           assertthat_0.2.1      
##  [46] networkD3_0.4          promises_1.2.0.1       scales_1.2.1          
##  [49] gtable_0.3.3           globals_0.16.2         processx_3.5.3        
##  [52] goftest_1.2-3          rlang_1.1.0            slanter_0.2-0         
##  [55] splines_4.1.2          lazyeval_0.2.2         spatstat.geom_3.1-0   
##  [58] yaml_2.3.7             reshape2_1.4.4         abind_1.4-5           
##  [61] httpuv_1.6.9           tools_4.1.2            usethis_2.1.5         
##  [64] ggplot2_3.4.1          ellipsis_0.3.2         jquerylib_0.1.4       
##  [67] RColorBrewer_1.1-3     BiocGenerics_0.40.0    sessioninfo_1.2.2     
##  [70] ggridges_0.5.4         Rcpp_1.0.10            plyr_1.8.8            
##  [73] progress_1.2.2         zlibbioc_1.40.0        purrr_1.0.1           
##  [76] RCurl_1.98-1.6         ps_1.7.0               prettyunits_1.1.1     
##  [79] deldir_1.0-6           pbapply_1.7-0          cowplot_1.1.1         
##  [82] S4Vectors_0.32.4       zoo_1.8-11             ggrepel_0.9.3         
##  [85] cluster_2.1.3          fs_1.6.1               magrittr_2.0.3        
##  [88] data.table_1.14.8      scattermore_0.8        lmtest_0.9-40         
##  [91] RANN_2.6.1             fitdistrplus_1.1-8     matrixStats_0.63.0    
##  [94] pkgload_1.2.4          hms_1.1.1              patchwork_1.1.2       
##  [97] mime_0.12              evaluate_0.20          xtable_1.8-4          
## [100] XML_3.99-0.9           IRanges_2.28.0         gridExtra_2.3         
## [103] testthat_3.1.4         compiler_4.1.2         biomaRt_2.50.3        
## [106] tibble_3.2.1           KernSmooth_2.23-20     crayon_1.5.2          
## [109] htmltools_0.5.5        later_1.3.0            tidyr_1.3.0           
## [112] DBI_1.1.2              dbplyr_2.1.1           rappdirs_0.3.3        
## [115] MASS_7.3-57            Matrix_1.5-1           brio_1.1.3            
## [118] cli_3.6.1              parallel_4.1.2         igraph_1.4.1          
## [121] pkgconfig_2.0.3        sp_1.6-0               plotly_4.10.1         
## [124] spatstat.sparse_3.0-1  xml2_1.3.3             bslib_0.4.2           
## [127] XVector_0.34.0         stringr_1.5.0          callr_3.7.0           
## [130] digest_0.6.31          pracma_2.4.2           sctransform_0.3.5     
## [133] RcppAnnoy_0.0.20       spatstat.data_3.0-1    Biostrings_2.62.0     
## [136] rmarkdown_2.21         leiden_0.4.3           uwot_0.1.14           
## [139] curl_5.0.0             shiny_1.7.4            lifecycle_1.0.3       
## [142] nlme_3.1-157           jsonlite_1.8.4         desc_1.4.1            
## [145] viridisLite_0.4.1      limma_3.50.3           fansi_1.0.4           
## [148] pillar_1.9.0           lattice_0.20-45        KEGGREST_1.34.0       
## [151] fastmap_1.1.1          pkgbuild_1.3.1         survival_3.3-1        
## [154] glue_1.6.2             remotes_2.4.2          png_0.1-8             
## [157] bit_4.0.4              stringi_1.7.12         sass_0.4.5            
## [160] blob_1.2.3             memoise_2.0.1          dplyr_1.1.1           
## [163] irlba_2.3.5.1          future.apply_1.10.0