Linear Segmentation

John Mount

2019-01-02

In this example we fit a piecewise linear function to example data.
Please see here for a discussion of the methodology.

library("RcppDynProg")

plot <- requireNamespace("ggplot2", quietly = TRUE)
if(plot) {
  library("ggplot2")
}

set.seed(2018)
g <- 100
d <- data.frame(
  x = 0.05*(1:(3*g))) # ordered in x
d$y_ideal <- sin((0.3*d$x)^2)
d$y_observed <- d$y_ideal + 0.25*rnorm(length(d$y_ideal))



if(plot) {
  plt1 <- ggplot(data= d, aes(x = x)) + 
    geom_line(aes(y = y_ideal), linetype=2) +
    geom_point(aes(y = y_observed)) +
    ylab("y") +
    ggtitle("raw data", 
            subtitle = "dots: observed values, dashed line: unobserved true values")
  print(plt1)
}

x_cuts <- solve_for_partition(d$x, d$y_observed, penalty = 1)
print(x_cuts)
##        x       pred group  what
## 1   0.05 -0.1570880     1  left
## 2   4.65  1.1593754     1 right
## 3   4.70  1.0653666     2  left
## 4   6.95 -0.9770792     2 right
## 5   7.00 -1.2254925     3  left
## 6   9.20  0.8971391     3 right
## 7   9.25  1.3792437     4  left
## 8  11.10 -1.1542021     4 right
## 9  11.15 -1.0418353     5  left
## 10 12.50  1.1519490     5 right
## 11 12.55  1.3964906     6  left
## 12 13.75 -1.2045219     6 right
## 13 13.80 -1.3791405     7  left
## 14 15.00  1.0195679     7 right
d$estimate <- approx(x_cuts$x, x_cuts$pred, xout = d$x, method = "linear", rule = 2)$y
d$group <- as.character(findInterval(d$x, x_cuts[x_cuts$what=="left", "x"]))
print(sum((d$y_observed - d$y_ideal)^2))
## [1] 20.42462
print(sum((d$estimate - d$y_ideal)^2))
## [1] 3.536541
print(sum((d$estimate - d$y_observed)^2))
## [1] 20.53796
if(plot) {
  plt2 <- ggplot(data= d, aes(x = x)) + 
    geom_line(aes(y = y_ideal), linetype=2) +
    geom_point(aes(y = y_observed, color = group)) +
    geom_line(aes(y = estimate, color = group)) +
    ylab("y") +
    ggtitle("RcppDynProg piecewise linear estimate",
            subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") + 
    theme(legend.position = "none")
  print(plt2)
}