Mosaic plots with ggplot2

Haley Jeppson and Heike Hofmann

2016-12-29

Introduction

Basic Explanation of ggmosaic

Creation of ggmosaic

ggmosaic was created primarily using ggproto and the productplots package

ggproto allows you to extend ggplot2 from within your own packages

ggplot2 limitations

ggplot2 is not capable of handling a variable number of variables

The product function creates limitiations for values the variables can take, and what the labels of variables can be. When the variables are combined, the values, variable name, and level are separated using “:”, “-”, and “.”

If any of the variable names or values of the variable contain one of those 3 symbols, the function will break

Current solution: an option to redefine what symbols are used as separators

Default separators:

get.separators()
## sep1 sep2 sep3 
##  ":"  "-"  "."

Separators redefined:

set.separators(c(":", ";", "|"))

These limitations also lead to issues with the labeling, but those can be manually fixed.

geom_mosaic: setting the aesthetics

Aesthetics that can be set:

These values are then sent through productplots functions to create the formula for the desired distribution

Formula: weight ~ fill + x | conds

From the aesthetics to the formula

Example of how the formula is built

  • weight = 1
  • x = product(Y, X)
  • fill = W
  • conds = Z

These aesthetics set up the formula for the distribution:

Formula: 1 ~ W + Y + X | Z

Because a mosaic plot is constructed hierarchically through alternating spines, the ordering of the variables is very important.

Weight ~ X

set.separators(c(":", ";","|"))

 ggplot(data = NHANES) +
   geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight), fill=factor(SleepHrsNight)), na.rm=TRUE) +
   labs(x="Hours of sleep a night ", title='f(SleepHrsNight)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

Weight ~ Y + X

set.separators(c(":", ";","|"))

 ggplot(data = NHANES) +
   geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight)), na.rm=TRUE) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) + labs(x="Age in Decades ", title='f(SleepHrsNight | AgeDecade) f(AgeDecade)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

Weight ~ X + Y / Z

set.separators(c(":", ";","_"))

 ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight), conds=product(Gender)), na.rm=TRUE, divider=mosaic("v")) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) + labs(x="Age in Decades ", title='f(SleepHrsNight, AgeDecade | Gender)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

Alternative to conditioning: facetting

ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight, AgeDecade), fill=factor(SleepHrsNight)), na.rm=TRUE) +    theme(axis.text.x=element_text(angle=-25, hjust= .1)) +
   labs(x="Age in Decades ", title='f(SleepHrsNight, AgeDecade | Gender)')  + facet_grid(Gender~.) + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

Importance of ordering

order1 <- ggplot(data = NHANES) + geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight, Gender), fill=factor(SleepHrsNight)), na.rm=TRUE, offset=0.015) + labs(x="Gender ", title='f(SleepHrsNight | Gender)  f(Gender)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) + theme(plot.title = element_text(size = rel(1)))

order2<- ggplot(data = NHANES) + geom_mosaic(aes(weight = Weight, x = product(Gender, SleepHrsNight), fill=factor(SleepHrsNight)), na.rm=TRUE, offset=0.015) + labs(x="", y="Gender", title='f(Gender | SleepHrsNight)  f(SleepHrsNight)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) + theme(plot.title = element_text(size = rel(1))) + coord_flip()
grid_arrange_shared_legend(order1, order2, ncol = 2, nrow = 1, position = "right")

Other features of geom_mosaic

Arguments unique to geom_mosaic:

Divider function: Types of partitioning

Four options available for each partion:

set.separators(c(":", ";","|"))

a2 <- ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="hbar", na.rm=TRUE) + theme(#axis.text.x=element_text(angle=35, hjust= 1),
     legend.position="none")+labs(x=" ", title='divider= "hbar"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))


a1 <- ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="hspine", na.rm=TRUE) + theme(#axis.text.x=element_text(angle=35, hjust= 1),
     legend.position="none") + labs(x=" ", title='divider= "hspine"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))


b2 <- ggplot(data = NHANES) +
   geom_mosaic(aes( x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="vbar", na.rm=TRUE) + theme(legend.position="none") + labs(y=" ", x="", title='divider= "vbar"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
 
b1 <- ggplot(data = NHANES) +
   geom_mosaic(aes(  x = product(SleepHrsNight), fill=factor(SleepHrsNight)), divider="vspine", na.rm=TRUE) + theme(legend.position="none") + labs(y=" ", x="", title='divider= "vspine"') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(a1, a2, b1, b2, ncol = 2, nrow = 2, position = "right")

Partitioning with one or more variables

set.separators(c(":", ";","|"))

m1 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=mosaic("h")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= mosaic()') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m2 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=mosaic("v")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= mosaic("v")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))


m3 <-ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=ddecker()) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= ddecker()') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(m1, m2, m3, ncol = 3, nrow = 1, position = "right")

m4 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("vspine", "vspine", "hbar")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("vspine", "vspine", "hbar")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m5 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("hbar", "vspine", "hbar")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("hbar", "vspine", "hbar")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m6 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("hspine", "hspine", "hspine")) +
   theme(axis.text.x=element_blank(),
legend.position="none")+labs(x=" ", title='divider= c("hspine", "hspine", "hspine")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))

m7 <-  ggplot(data = NHANES) + geom_mosaic(aes(x=product(SleepHrsNight, Gender, AgeDecade), fill = factor(SleepHrsNight)), na.rm=T, divider=c("vspine", "vspine", "vspine")) +
   theme(axis.text.x=element_blank(), legend.position="none")+labs(x=" ", title='divider= c("vspine", "vspine", "vspine")') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(m4, m5, m6, m7, ncol = 2, nrow = 2, position="right")

geom_mosaic: offset

offset: Set the space between the first spine

Adjusting the offset

oo1 <- ggplot(data = NHANES) +
  geom_mosaic(aes(weight = Weight,  x = product(Age), fill=factor(SleepHrsNight)), na.rm=TRUE) +   theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0.01") + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) 

o1 <- ggplot(data = happy) +
  geom_mosaic(aes(weight = wtssall,  x = product(age), fill=marital)) +
  theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0.01")+ guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) 

oo2 <- ggplot(data = NHANES) +
  geom_mosaic(aes(weight = Weight,  x = product(Age), fill=factor(SleepHrsNight)), offset=0, na.rm=TRUE) +   theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0")+ guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE)) 

o2 <- ggplot(data = happy) +
  geom_mosaic(aes(weight = wtssall,  x = product(age), fill=marital), offset = 0) +
  theme(axis.text.x=element_text(angle=0, hjust= .5))+labs(x="Age", y=" ",  title=" offset = 0") + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
grid_arrange_shared_legend(oo1, oo2, nrow = 1, ncol =2, position="right")

Plotly

gg <- ggplot(data = NHANES) + geom_mosaic(aes(x = product(SleepHrsNight, Gender, 
    AgeDecade), fill = factor(SleepHrsNight)), na.rm = T, divider = ddecker(), 
    offset = 0.025)
# just for now commented out ggplotly(gg)