scGOclust
is a package that leverages Gene Ontology to
analyse the functional profile of cells with scRNA-seq data.
# load required libraries
library(Seurat)
## Attaching SeuratObject
library(pheatmap)
library(httr)
## if (!require("devtools")) install.packages("devtools")
## install latest from source
## for reprodubcibility we do not update dependencies
# devtools::install_github("YY-SONG0718/scGOclust", upgrade_dependencies = FALSE)
library(scGOclust)
#
# get a gene to GO BP terms mapping table
# remove electronically inferred records
# sometimes ensembl complains about ssh certificate has expired, this is a known issue, run this code
::set_config(httr::config(ssl_verifypeer = FALSE))
httr
= ensemblToGo(species = 'mmusculus', GO_linkage_type = c('experimental', 'phylogenetic', 'computational', 'author', 'curator' )) mmu_tbl
## query biomart
## including GO link types:
## c("EXP", "IDA", "IPI", "IMP", "IGI", "IEP", "HTP", "HDA", "HMP", "HGI", "HEP")c("IBA", "IBD", "IKR", "IRD")c("ISS", "ISO", "ISA", "ISM", "IGC", "RCA")c("TAS", "NAS")c("IC", "ND")
= ensemblToGo(species = 'dmelanogaster', GO_linkage_type = c('experimental', 'phylogenetic', 'computational', 'author', 'curator' )) dme_tbl
## query biomart
## including GO link types:
## c("EXP", "IDA", "IPI", "IMP", "IGI", "IEP", "HTP", "HDA", "HMP", "HGI", "HEP")c("IBA", "IBD", "IKR", "IRD")c("ISS", "ISO", "ISA", "ISM", "IGC", "RCA")c("TAS", "NAS")c("IC", "ND")
# load the gene expression raw count objects
data(mmu_subset)
data(dme_subset)
ls()
## [1] "dme_subset" "dme_tbl" "mmu_subset" "mmu_tbl"
## construct a Seurat object with GO BP as features
<- makeGOSeurat(ensembl_to_GO = mmu_tbl, feature_type = 'external_gene_name', seurat_obj = mmu_subset) mmu_go_obj
## collect data
## compute GO to cell matrix, might take a few secs
## time used: 1.74 secs
## returning GO Seurat object
<- makeGOSeurat(ensembl_to_GO = dme_tbl, feature_type = 'external_gene_name', seurat_obj = dme_subset) dme_go_obj
## collect data
## compute GO to cell matrix, might take a few secs
## time used: 0.32 secs
## returning GO Seurat object
# specify the column with cell type annotation in seurat_obj@meta.data
<- getCellTypeGO(go_seurat_obj = mmu_go_obj, cell_type_col = 'cell_type_annotation') mmu_ct_go
## Centering and scaling data matrix
<- getCellTypeGO(go_seurat_obj = dme_go_obj, cell_type_col = 'annotation') dme_ct_go
## Centering and scaling data matrix
# heatmap of Pearson's correlation coefficient of cell type average BP profiles within species
= cellTypeGOCorr(cell_type_go = mmu_ct_go, corr_method = 'pearson')
mmu_corr pheatmap(mmu_corr)
= cellTypeGOCorr(cell_type_go = dme_ct_go, corr_method = 'pearson')
dme_corr pheatmap(dme_corr)
# calculate Pearson's correlation coefficient of cell type average BP profiles across species
= crossSpeciesCellTypeGOCorr(species_1 = 'mmusculus', species_2 = 'dmelanogaster', cell_type_go_sp1 = mmu_ct_go, cell_type_go_sp2 = dme_ct_go, corr_method = 'pearson') corr
# cross-species cell type profile heatmap
pheatmap(corr, width = 9, height = 10)
pheatmap(corr, scale = 'column', width = 9, height = 10)
# sheatmap tries to put cells with higher values on the diagonal
# helpful when cross-species cell type similarity signal is less clear
::sheatmap((corr + 0.5), width = 9, height = 10) slanter
# scale by row or column to see relative similarity
::sheatmap((corr + 0.5), scale = 'column', width = 9, height = 10) slanter
# analyze the cell-by-GO BP profile as a count matrix
= analyzeGOSeurat(go_seurat_obj = mmu_go_obj, cell_type_col = 'cell_type_annotation') mmu_go_analyzed
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 219
## Number of edges: 9789
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.4540
## Number of communities: 3
## Elapsed time: 0 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 12:13:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:13:07 Read 219 rows and found 50 numeric columns
## 12:13:07 Using Annoy for neighbor search, n_neighbors = 30
## 12:13:07 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:13:07 Writing NN index file to temp file /var/folders/37/wf962dk574750g0xnnlxjwvm0000gp/T//RtmpG1lmWY/file224c291eda6
## 12:13:07 Searching Annoy index using 1 thread, search_k = 3000
## 12:13:07 Annoy recall = 100%
## 12:13:07 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 12:13:08 Initializing from normalized Laplacian + noise (using irlba)
## 12:13:08 Commencing optimization for 500 epochs, with 7780 positive edges
## 12:13:09 Optimization finished
# UMAP plot of the analyzed cell-by-GO BP profile
# labeled by previously specified cell annotation column in meta.data
DimPlot(mmu_go_analyzed, label = TRUE) + NoLegend()
= analyzeGOSeurat(go_seurat_obj = dme_go_obj, cell_type_col = 'annotation') dme_go_analyzed
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 180
## Number of edges: 5928
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5779
## Number of communities: 3
## Elapsed time: 0 seconds
## 12:13:10 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:13:10 Read 180 rows and found 50 numeric columns
## 12:13:10 Using Annoy for neighbor search, n_neighbors = 30
## 12:13:10 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:13:10 Writing NN index file to temp file /var/folders/37/wf962dk574750g0xnnlxjwvm0000gp/T//RtmpG1lmWY/file224c5cf91c0c
## 12:13:10 Searching Annoy index using 1 thread, search_k = 3000
## 12:13:10 Annoy recall = 100%
## 12:13:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 12:13:12 Initializing from normalized Laplacian + noise (using irlba)
## 12:13:12 Commencing optimization for 500 epochs, with 6632 positive edges
## 12:13:12 Optimization finished
DimPlot(dme_go_analyzed, label = TRUE) + NoLegend()
## calculation takes a few minutes due to the Wilcoxon signed rank test
= getCellTypeSharedGO(species_1 = 'mmusculus', species_2 = 'dmelanogaster', analyzed_go_seurat_sp1 = mmu_go_analyzed, analyzed_go_seurat_sp2 = dme_go_analyzed, cell_type_col_sp1 = 'cell_type_annotation', cell_type_col_sp2 = 'annotation')
ct_shared_go
head(ct_shared_go)
# query shared GO terms for specific cell type pairs
getCellTypeSharedTerms(shared_go = ct_shared_go,
cell_type_sp1 = 'intestine_Enteroendocrine cell',
cell_type_sp2 = 'enteroendocrine cell',
return_full = FALSE)
plotCellTypeSankey(corr_matrix = corr, corr_threshould = 0.05)
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scGOclust_0.1.0 httr_1.4.5 pheatmap_1.0.12 SeuratObject_4.1.3
## [5] Seurat_4.3.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 spatstat.explore_3.1-0 reticulate_1.28
## [4] tidyselect_1.2.0 RSQLite_2.2.18 AnnotationDbi_1.56.2
## [7] htmlwidgets_1.6.2 grid_4.1.2 Rtsne_0.16
## [10] devtools_2.4.3 munsell_0.5.0 codetools_0.2-18
## [13] ica_1.0-3 future_1.32.0 miniUI_0.1.1.1
## [16] withr_2.5.0 spatstat.random_3.1-4 colorspace_2.1-0
## [19] progressr_0.13.0 Biobase_2.54.0 filelock_1.0.2
## [22] highr_0.10 knitr_1.42 rstudioapi_0.13
## [25] stats4_4.1.2 ROCR_1.0-11 tensor_1.5
## [28] listenv_0.9.0 labeling_0.4.2 GenomeInfoDbData_1.2.7
## [31] polyclip_1.10-4 farver_2.1.1 bit64_4.0.5
## [34] rprojroot_2.0.3 parallelly_1.35.0 vctrs_0.6.1
## [37] generics_0.1.3 xfun_0.38 BiocFileCache_2.2.1
## [40] R6_2.5.1 GenomeInfoDb_1.30.1 bitops_1.0-7
## [43] spatstat.utils_3.0-2 cachem_1.0.7 assertthat_0.2.1
## [46] networkD3_0.4 promises_1.2.0.1 scales_1.2.1
## [49] gtable_0.3.3 globals_0.16.2 processx_3.5.3
## [52] goftest_1.2-3 rlang_1.1.0 slanter_0.2-0
## [55] splines_4.1.2 lazyeval_0.2.2 spatstat.geom_3.1-0
## [58] yaml_2.3.7 reshape2_1.4.4 abind_1.4-5
## [61] httpuv_1.6.9 tools_4.1.2 usethis_2.1.5
## [64] ggplot2_3.4.1 ellipsis_0.3.2 jquerylib_0.1.4
## [67] RColorBrewer_1.1-3 BiocGenerics_0.40.0 sessioninfo_1.2.2
## [70] ggridges_0.5.4 Rcpp_1.0.10 plyr_1.8.8
## [73] progress_1.2.2 zlibbioc_1.40.0 purrr_1.0.1
## [76] RCurl_1.98-1.6 ps_1.7.0 prettyunits_1.1.1
## [79] deldir_1.0-6 pbapply_1.7-0 cowplot_1.1.1
## [82] S4Vectors_0.32.4 zoo_1.8-11 ggrepel_0.9.3
## [85] cluster_2.1.3 fs_1.6.1 magrittr_2.0.3
## [88] data.table_1.14.8 scattermore_0.8 lmtest_0.9-40
## [91] RANN_2.6.1 fitdistrplus_1.1-8 matrixStats_0.63.0
## [94] pkgload_1.2.4 hms_1.1.1 patchwork_1.1.2
## [97] mime_0.12 evaluate_0.20 xtable_1.8-4
## [100] XML_3.99-0.9 IRanges_2.28.0 gridExtra_2.3
## [103] testthat_3.1.4 compiler_4.1.2 biomaRt_2.50.3
## [106] tibble_3.2.1 KernSmooth_2.23-20 crayon_1.5.2
## [109] htmltools_0.5.5 later_1.3.0 tidyr_1.3.0
## [112] DBI_1.1.2 dbplyr_2.1.1 rappdirs_0.3.3
## [115] MASS_7.3-57 Matrix_1.5-1 brio_1.1.3
## [118] cli_3.6.1 parallel_4.1.2 igraph_1.4.1
## [121] pkgconfig_2.0.3 sp_1.6-0 plotly_4.10.1
## [124] spatstat.sparse_3.0-1 xml2_1.3.3 bslib_0.4.2
## [127] XVector_0.34.0 stringr_1.5.0 callr_3.7.0
## [130] digest_0.6.31 pracma_2.4.2 sctransform_0.3.5
## [133] RcppAnnoy_0.0.20 spatstat.data_3.0-1 Biostrings_2.62.0
## [136] rmarkdown_2.21 leiden_0.4.3 uwot_0.1.14
## [139] curl_5.0.0 shiny_1.7.4 lifecycle_1.0.3
## [142] nlme_3.1-157 jsonlite_1.8.4 desc_1.4.1
## [145] viridisLite_0.4.1 limma_3.50.3 fansi_1.0.4
## [148] pillar_1.9.0 lattice_0.20-45 KEGGREST_1.34.0
## [151] fastmap_1.1.1 pkgbuild_1.3.1 survival_3.3-1
## [154] glue_1.6.2 remotes_2.4.2 png_0.1-8
## [157] bit_4.0.4 stringi_1.7.12 sass_0.4.5
## [160] blob_1.2.3 memoise_2.0.1 dplyr_1.1.1
## [163] irlba_2.3.5.1 future.apply_1.10.0