ggpmisc
0.2.6library(ggplot2)
library(ggpmisc)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lubridate)
try_data_frame()
Several different formats for storing time series data are used in R. Here we use in the examples objects of class ts
but several other classes are supported as try.xts()
is used internally. The first example is a quarterly series.
class(austres)
## [1] "ts"
austres.df <- try_data_frame(austres)
class(austres.df)
## [1] "data.frame"
lapply(austres.df, "class")
## $time
## [1] "POSIXct" "POSIXt"
##
## $V.austres
## [1] "numeric"
head(austres.df, 4)
## time V.austres
## 1 1971-04-01 13067.3
## 2 1971-07-01 13130.5
## 3 1971-10-01 13198.4
## 4 1972-01-01 13254.2
The next chunk demonstrates that numeric times are expressed as decimal years in the returned data frame.
austres.df <- try_data_frame(austres, as.numeric = TRUE)
lapply(austres.df, "class")
## $time
## [1] "numeric"
##
## $V.austres
## [1] "numeric"
head(austres.df, 4)
## time V.austres
## 1 1971.247 13067.3
## 2 1971.496 13130.5
## 3 1971.748 13198.4
## 4 1972.000 13254.2
This second example is for a series of yearly values.
class(lynx)
## [1] "ts"
lynx.df <- try_data_frame(lynx)
class(lynx.df)
## [1] "data.frame"
lapply(lynx.df, "class")
## $time
## [1] "POSIXct" "POSIXt"
##
## $V.lynx
## [1] "numeric"
head(lynx.df, 3)
## time V.lynx
## 1 1821-01-01 00:00:01 269
## 2 1822-01-01 00:00:01 321
## 3 1823-01-01 00:00:01 585
Above there is a small rounding error of 1 s for these old dates. We can correct this by rounding to year.
lynx.df <- try_data_frame(lynx, "year")
head(lynx.df, 3)
## time V.lynx
## 1 1821-01-01 269
## 2 1822-01-01 321
## 3 1823-01-01 585
In addition we can convert the POSIXct values into numeric values in calendar years plus a decimal fraction.
lynx_n.df <- try_data_frame(lynx, "year", as.numeric = TRUE)
lapply(lynx_n.df, "class")
## $time
## [1] "numeric"
##
## $V.lynx
## [1] "numeric"
head(lynx_n.df, 3)
## time V.lynx
## 1 1821 269
## 2 1822 321
## 3 1823 585
try_data_frame()
attempts to handle gracefully objects that are not time series.
try_data_frame(1:5)
## x
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
try_data_frame(letters[1:5])
## x
## 1 a
## 2 b
## 3 c
## 4 d
## 5 e
try_data_frame(factor(letters[1:5]))
## x
## 1 a
## 2 b
## 3 c
## 4 d
## 5 e
try_data_frame(list(x = rep(1,5), y = 1:5))
## x y
## 1 1 1
## 2 1 2
## 3 1 3
## 4 1 4
## 5 1 5
try_data_frame(data.frame(x = rep(1,5), y = 1:5))
## x y
## 1 1 1
## 2 1 2
## 3 1 3
## 4 1 4
## 5 1 5
try_data_frame(matrix(1:10, ncol = 2))
## V1 V2
## 1 1 6
## 2 2 7
## 3 3 8
## 4 4 9
## 5 5 10
stat_peaks()
and stat_valleys()
Using POSIXct for time
and the default formatting of labels.
ggplot(lynx.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", vjust = -0.5) +
ylim(-100, 7300)
Using numeric values for time
and the default formatting of labels.
ggplot(lynx_n.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", vjust = -0.5) +
ylim(-100, 7300)
Using POSIXct for time
but supplying a format string. In addition marking both peaks and valleys.
ggplot(lynx.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", vjust = -0.5, x.label.fmt = "%Y") +
stat_valleys(colour = "blue") +
stat_valleys(geom = "text", colour = "blue", vjust = 1.5, x.label.fmt = "%Y") +
ylim(-100, 7300)
Using numeric for time
but supplying a format string. In addition marking both peaks and valleys.
ggplot(lynx_n.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", vjust = -0.5, x.label.fmt = "%4.0f") +
stat_valleys(colour = "blue") +
stat_valleys(geom = "text", colour = "blue", vjust = 1.5, x.label.fmt = "%4.0f") +
ylim(-100, 7300)
Rotating the labels.
ggplot(lynx.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", angle = 66,
hjust = -0.1, x.label.fmt = "%Y") +
ylim(NA, 7800)
Using geom_rug
for the peaks and valleys.
ggplot(lynx.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "rug", colour = "red") +
stat_valleys(colour = "blue") +
stat_valleys(geom = "rug", colour = "blue")
stat_poly_eq()
We generate some artificial data.
set.seed(4321)
# generate artificial data
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x, y, group = c("A", "B"), y2 = y * c(0.5,2))
First one example using defaults.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(formula = formula, parse = TRUE)
stat_poly_eq()
makes available three different labels in the returned data frame. One of these is used by default, but aes()
can be used to select a different one.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..adj.rr.label..), formula = formula,
parse = TRUE)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), formula = formula,
parse = TRUE)
Within aes()
it is possible to compute new labels based on those returned plus “arbitrary” text. The supplied labels are meant to be parsed into expressions, so any text added should be valid for a string that will be parsed.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = paste(..eq.label.., ..adj.rr.label.., sep = "~~~~")),
formula = formula, parse = TRUE)
Two examples of removing and changing the lhs and/or rhs of the equation.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..),
eq.with.lhs = FALSE,
formula = formula, parse = TRUE)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..),
eq.with.lhs = "italic(hat(y))~`=`~",
formula = formula, parse = TRUE)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
labs(x = expression(italic(z)), y = expression(italic(h)) ) +
stat_poly_eq(aes(label = ..eq.label..),
eq.with.lhs = "italic(h)~`=`~",
eq.x.rhs = "~italic(z)",
formula = formula, parse = TRUE)
A couple of additional examples of polynomials of different orders, and specified in different ways.
Higher order polynomial.
formula <- y ~ poly(x, 5, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), formula = formula, parse = TRUE)
Intercept forced to zero.
formula <- y ~ x + I(x^2) + I(x^3) - 1
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), formula = formula,
parse = TRUE)
We give below several examples to demonstrate how other components of the ggplot
object affect the behaviour of this statistic.
Facets work as expected either with fixed or free scales. Although bellow we had to adjust the size of the font used for the equation.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), size = rel(2.8),
formula = formula, parse = TRUE) +
facet_wrap(~group)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), size = rel(2.8),
formula = formula, parse = TRUE) +
facet_wrap(~group, scales = "free_y")
Grouping, in this example using colour aesthetic also works as expected.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), vjust = c(-8, 0),
formula = formula, parse = TRUE) +
theme_bw()
stat_debug()
These stats are very simple and simply echo to labels on the plot itself a summary of the data received by the compute functions.
In the absence of facets or groups we get just one label.
ggplot(my.data, aes(x, y)) + geom_point() + stat_debug_group()
ggplot(my.data, aes(x, y)) + geom_point() + stat_debug_panel()
In the case of grouping if the groups do not have very different centres in the x y space tweaking of positions can be needed to avoid overlaps.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_debug_group(vjust = c(-0.5,1.5))
Without facets, we still have only one panel.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_debug_panel()
The text would be similar, except for the column named after the aesthetic, for other aesthetics used for grouping, but the labels will visually differ only if the geom used for the debug stat is affected by them.
ggplot(my.data, aes(x, y, shape = group)) + geom_point() +
stat_debug_group(vjust = c(-0.5,1.5))