library(RTCGA.PANCAN12)
library(factorMerger)
data("expression.cb1")
data("expression.cb2")
data("clinical.cb")
expression.cb <- rbind(expression.cb1, expression.cb2)
rownames(expression.cb) <- expression.cb[,1]
expression.cb <- data.frame(t(expression.cb[,-1]))
expression.cb$sampleID <- gsub(rownames(expression.cb), pattern = ".", replacement = "-", fixed = TRUE)
selectedCols <- c(grep(colnames(expression.cb), pattern = "HSP", value = TRUE), "sampleID")
selected <- merge(expression.cb[,selectedCols], clinical.cb[,c("sampleID","X_cohort")], by = "sampleID")
selected$Cancer <- substr(as.character(selected$X_cohort), 6, 100)
library(ggplot2)
ggplot(selected, aes(Cancer,HSPA12B)) +
geom_violin() + coord_flip()
anova(lm(HSPA12B ~ Cancer, data = selected))
#> Analysis of Variance Table
#>
#> Response: HSPA12B
#> Df Sum Sq Mean Sq F value Pr(>F)
#> Cancer 12 2336.3 194.692 177.69 < 2.2e-16 ***
#> Residuals 3585 3928.0 1.096
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(HSPA12B ~ Cancer, data = selected))
#>
#> Call:
#> lm(formula = HSPA12B ~ Cancer, data = selected)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -4.3866 -0.6535 0.0236 0.6922 4.1969
#>
#> Coefficients:
#> Estimate Std. Error
#> (Intercept) -1.96185 0.07958
#> CancerBladder Cancer 1.65407 0.12375
#> CancerBreast Cancer 2.46959 0.08739
#> CancerColon Cancer 1.40436 0.11000
#> CancerEndometrioid Cancer 1.38728 0.09641
#> CancerFormalin Fixed Paraffin-Embedded Pilot Phase II 2.71519 0.31247
#> CancerGlioblastoma 2.28696 0.11373
#> CancerHead and Neck Cancer 1.68020 0.09975
#> CancerKidney Clear Cell Carcinoma 3.42967 0.09293
#> CancerLung Adenocarcinoma 1.85849 0.09724
#> CancerLung Squamous Cell Carcinoma 1.44430 0.10286
#> CancerOvarian Cancer 1.35578 0.10231
#> CancerRectal Cancer 1.46895 0.14680
#> t value Pr(>|t|)
#> (Intercept) -24.652 <2e-16 ***
#> CancerBladder Cancer 13.366 <2e-16 ***
#> CancerBreast Cancer 28.258 <2e-16 ***
#> CancerColon Cancer 12.767 <2e-16 ***
#> CancerEndometrioid Cancer 14.390 <2e-16 ***
#> CancerFormalin Fixed Paraffin-Embedded Pilot Phase II 8.689 <2e-16 ***
#> CancerGlioblastoma 20.109 <2e-16 ***
#> CancerHead and Neck Cancer 16.845 <2e-16 ***
#> CancerKidney Clear Cell Carcinoma 36.908 <2e-16 ***
#> CancerLung Adenocarcinoma 19.113 <2e-16 ***
#> CancerLung Squamous Cell Carcinoma 14.041 <2e-16 ***
#> CancerOvarian Cancer 13.251 <2e-16 ***
#> CancerRectal Cancer 10.006 <2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 1.047 on 3585 degrees of freedom
#> (1 observation deleted due to missingness)
#> Multiple R-squared: 0.373, Adjusted R-squared: 0.3709
#> F-statistic: 177.7 on 12 and 3585 DF, p-value: < 2.2e-16
library(agricolae)
lsdResult <- LSD.test(aov(HSPA12B ~ Cancer, data = selected),
trt = "Cancer",
p.adj = "bonferroni")
lsdResult$groups
#> trt means M
#> 1 Kidney Clear Cell Carcinoma 1.4678151 a
#> 2 Formalin Fixed Paraffin-Embedded Pilot Phase II 0.7533333 ab
#> 3 Breast Cancer 0.5077381 b
#> 4 Glioblastoma 0.3251024 b
#> 5 Lung Adenocarcinoma -0.1033618 bc
#> 6 Head and Neck Cancer -0.2816502 bcd
#> 7 Bladder Cancer -0.3077869 bcde
#> 8 Rectal Cancer -0.4929067 cde
#> 9 Lung Squamous Cell Carcinoma -0.5175581 de
#> 10 Colon Cancer -0.5574900 de
#> 11 Endometrioid Cancer -0.5745711 e
#> 12 Ovarian Cancer -0.6060749 e
#> 13 Acute Myeloid Leukemia -1.9618542 f
merging <- mergeFactors(selected$HSPA12A, factor(selected$Cancer), subsequent = TRUE)
merging
#> Factor levels were recoded as below:
#>
#> recoded original
#> --------- ------------------------------------------------
#> (AcML) Acute Myeloid Leukemia
#> (BldC) Bladder Cancer
#> (BrsC) Breast Cancer
#> (ClnC) Colon Cancer
#> (EndC) Endometrioid Cancer
#> (FFPPPI) Formalin Fixed Paraffin-Embedded Pilot Phase II
#> (Glbl) Glioblastoma
#> (HaNC) Head and Neck Cancer
#> (KCCC) Kidney Clear Cell Carcinoma
#> (LngA) Lung Adenocarcinoma
#> (LSCC) Lung Squamous Cell Carcinoma
#> (OvrC) Ovarian Cancer
#> (RctC) Rectal Cancer
#>
#>
#>
#> groupA groupB model pval
#> --------------------------------------- --------------------------------------------- ---------- -------
#> -5560.944 1.0000
#> (RctC) (ClnC) -5560.944 0.9953
#> (BrsC) (FFPPPI) -5560.988 0.7680
#> (EndC) (BrsC)(FFPPPI) -5561.724 0.2257
#> (RctC)(ClnC) (LSCC) -5563.653 0.0499
#> (BldC) (LngA) -5565.844 0.0366
#> (Glbl) (OvrC) -5568.162 0.0315
#> (HaNC) (EndC)(BrsC)(FFPPPI) -5574.389 0.0004
#> (BldC)(LngA) (RctC)(ClnC)(LSCC) -5589.883 0.0000
#> (HaNC)(EndC)(BrsC)(FFPPPI) (Glbl)(OvrC) -5670.261 0.0000
#> (AcML) (BldC)(LngA)(RctC)(ClnC)(LSCC) -5775.098 0.0000
#> (HaNC)(EndC)(BrsC)(FFPPPI)(Glbl)(OvrC) (KCCC) -6172.380 0.0000
#> (AcML)(BldC)(LngA)(RctC)(ClnC)(LSCC) (HaNC)(EndC)(BrsC)(FFPPPI)(Glbl)(OvrC)(KCCC) -6758.771 0.0000
plotTree(merging, simplify = FALSE)
plotTree(merging, "pval")
df <- subset(selected, select = -c(sampleID, X_cohort, Cancer))
cancer <- as.factor(selected$Cancer)
merging <- mergeFactors(df, cancer, subsequent = TRUE)
appendToTree(merging, plotProfile(merging) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))
appendToTree(merging, plotHeatmap(merging) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))