Visualize Sports Injury Data

2023-11-14

library(injurytools)
library(ggplot2)
library(dplyr)
library(gridExtra)
library(grid)
library(knitr)

Example data: we continue exploring the cohort of Liverpool Football Club male’s first team players over two consecutive seasons, 2017-2018 and 2018-2019, scrapped from https://www.transfermarkt.com/ website1.

A quick glance

gg_injphoto(injd, 
            title   = "Overview of injuries:\nLiverpool FC 1st male team during 2017-2018 and 2018-2019 seasons",
            by_date = "2 month", 
            fix     = TRUE) +
  
  ## plus some lines of ggplot2 code..
  xlab("Follow-up date") + ylab("Players") + labs(caption = "source: transfermarkt.com") +
  theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 22),
        axis.text.x.bottom = element_text(size = 13, angle = 20, hjust = 1),
        axis.text.y.left   = element_text(size = 12),
        axis.title.x       = element_text(size = 20, face = "bold", vjust = -1),
        axis.title.y       = element_text(size = 20, face = "bold", vjust = 1.8),
        legend.text        = element_text(size = 20),
        plot.caption       = element_text(face = "italic", size = 12, colour = "gray10"))

Let’s count how many injuries (red crosses in the graph) occurred and how severe they were (length of the thick black line).

# warnings set to FALSE
injds        <- injsummary(injd) 
injds_perinj <- injsummary(injd, var_type_injury = "injury_type") 
# injds
Code for tidying up the tables
injds[["overall"]] |> 
  mutate(incidence_new = paste0(round(injincidence, 2), " (", round(injincidence_lower, 2), ",", round(injincidence_upper, 2), ")"),
         burden_new    = paste0(round(injburden, 2), " (", round(injburden_lower, 2), ",", round(injburden_upper, 2), ")")) |> 
  dplyr::select(1:2, 6, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")

injds_perinj[["overall"]] |> 
  mutate(incidence_new = paste0(round(injincidence, 2), " (", round(injincidence_lower, 2), ",", round(injincidence_upper, 2), ")"),
         burden_new    = paste0(round(injburden, 2), " (", round(injburden_lower, 2), ",", round(injburden_upper, 2), ")")) |> 
  dplyr::select(1:2, 4, 9, incidence_new, burden_new) |> 
  kable(col.names = c("Type of injury", "N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")

Overall

Injury incidence and injury burden are reported as 100 player-matches
N injuries N days lost Total expo Incidence (95% CI) Burden (95% CI)
82 2049 74690 9.88 (7.74,12.02) 246.9 (236.21,257.59)
Overall per type of injury
Injury incidence and injury burden are reported as 100 player-matches
Type of injury N injuries N days lost Total expo Incidence (95% CI) Burden (95% CI)
Bone 11 173 74690 1.33 (0.54,2.11) 20.85 (17.74,23.95)
Concussion 16 213 74690 1.93 (0.98,2.87) 25.67 (22.22,29.11)
Ligament 9 596 74690 1.08 (0.38,1.79) 71.82 (66.05,77.58)
Muscle 25 735 74690 3.01 (1.83,4.19) 88.57 (82.16,94.97)
Unknown 21 332 74690 2.53 (1.45,3.61) 40.01 (35.7,44.31)

Let’s plot the information shown in the second table in a risk matrix that displays injury incidence against injury burden.

# warnings set to FALSE
gg_injriskmatrix(injds_perinj, 
                 var_type_injury = "injury_type", 
                 title = "Risk matrix")
Code for further plot specifications
# warnings set to FALSE
palette <- c("#000000", "#E69F00", "#56B4E9", "#009E73",
             "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x.bottom = element_text(size = 20),
                axis.text.y.left = element_text(size = 20),
                axis.title.x = element_text(size = 15),
                axis.title.y = element_text(size = 15),
                legend.title = element_text(size = 15),
                legend.text = element_text(size = 15))

gg_injriskmatrix(injds_perinj, 
                 var_type_injury = "injury_type", 
                 title = "Risk matrix") +
  scale_fill_manual(name = "Type of injury",
                    values = palette[c(7:8, 2:3, 5)]) +
  guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3


Comparing injuries occurred in 17/18 vs. 18/19

We prepare two injd objects:

# warnings set to FALSE
injd1 <- cut_injd(injd, datef = 2017)
injd2 <- cut_injd(injd, date0 = 2018)
## Plot just for checking whether cut_injd() worked well
p1 <- gg_injphoto(injd1, fix = TRUE, by_date = "3 months")
p2 <- gg_injphoto(injd2, fix = TRUE, by_date = "3 months")
grid.arrange(p1, p2, ncol = 2)

Let’s compute injury summary statistics for each season.

# warnings set to FALSE
injds1 <- injsummary(injd1)
injds2 <- injsummary(injd2)
Code for tidying up the tables
## **Season 2017/2018**
injds1[["overall"]] |> 
  mutate(incidence_new = paste0(round(injincidence, 2), " (", round(injincidence_lower, 2), ",", round(injincidence_upper, 2), ")"),
         burden_new    = paste0(round(injburden, 2), " (", round(injburden_lower, 2), ",", round(injburden_upper, 2), ")")) |> 
  dplyr::select(1:2, 6, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")

## **Season 2018/2019**
injds2[["overall"]] |> 
  mutate(incidence_new = paste0(round(injincidence, 2), "  (", round(injincidence_lower, 2), ",", round(injincidence_upper, 2), ")"),
         burden_new    = paste0(round(injburden, 2), "  (", round(injburden_lower, 2), ",", round(injburden_upper, 2), ")")) |> 
  dplyr::select(1:2, 6, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")
Season 2017/2018
Injury incidence and injury burden are reported as 100 player-matches
N injuries N days lost Total expo Incidence (95% CI) Burden (95% CI)
26 1141 37364 6.26 (3.86,8.67) 274.84 (258.89,290.78)
Season 2018/2019
Injury incidence and injury burden are reported as 100 player-matches
N injuries N days lost Total expo Incidence (95% CI) Burden (95% CI)
56 908 37326 13.5 (9.97,17.04) 218.94 (204.7,233.18)


- Who were the most injured players? And the most severely affected?

Player-wise statistics can be extracted by injds2 <- injsummary(injd1); injds2[[1]] (or injds2[["playerwise"]]). Then, we plot them:

p11 <- gg_injbarplot(injds1)
p12 <- gg_injbarplot(injds1, type = "burden")
p21 <- gg_injbarplot(injds2)
p22 <- gg_injbarplot(injds2, type = "burden") 

# grid.arrange(p11, p21, p12, p22, nrow = 2)
Code for further plot specifications
theme2 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 26),
                axis.text.x.bottom = element_text(size = 18),
                axis.text.y.left = element_text(size = 13),
                axis.title.x = element_text(size = 11, vjust = 1),
                axis.title.y = element_text(size = 22, face = "bold", vjust = 1))

p11 <- p11 + 
  xlab("Injury incidence") + 
  ylab("Player-wise incidence (injuries per 100 player-match)") +
  ggtitle("2017/2018 season") +
  scale_y_continuous(limits = c(0, 80)) + ## same x axis 
  theme2 +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.5, "cm"))
p12 <- p12 +
  xlab("Injury burden") + 
  ylab("Player-wise burden (days lost per 100 player-match)") +
  scale_y_continuous(limits = c(0, 6110)) + 
  theme2 +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.65, "cm"))

p21 <- p21 + 
  ylab("Player-wise incidence (injuries per 100 player-match)") +
  ggtitle("2018/2019 season") +
  scale_y_continuous(limits = c(0, 80)) + 
  theme2 
p22 <- p22 +
  ylab("Player-wise burden (days lost per 100 player-match)") +
  scale_y_continuous(limits = c(0, 6110)) + 
  theme2

grid.arrange(p11, p21, p12, p22, nrow = 2)


- Which injuries were more frequent? And more burdensome?

# warnings set to FALSE
## Calculate summary statistics
injds1_perinj <- injsummary(injd1, var_type_injury = "injury_type")
injds2_perinj <- injsummary(injd2, var_type_injury = "injury_type")

## Plot
p1 <- gg_injriskmatrix(injds1_perinj, var_type_injury = "injury_type", 
                       title = "Season 2017/2018", add_contour = FALSE)
p2 <- gg_injriskmatrix(injds2_perinj, var_type_injury = "injury_type",
                       title = "Season 2018/2019", add_contour = FALSE)

# Print both plots side by side
# grid.arrange(p1, p2, nrow = 1)
Code for further plot specifications
palette <- c("#000000", "#E69F00", "#56B4E9", "#009E73",
             "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x.bottom = element_text(size = 18),
                axis.text.y.left = element_text(size = 18),
                axis.title.x = element_text(size = 18),
                axis.title.y = element_text(size = 18),
                legend.title = element_text(size = 15),
                legend.text = element_text(size = 15))

## Plot
p1 <- gg_injriskmatrix(injds1_perinj, var_type_injury = "injury_type", 
                       title = "Season 2017/2018", add_contour = T,
                       cont_max_x = 6, cont_max_y = 130, ## after checking the data
                       bins = 15) 
p2 <- gg_injriskmatrix(injds2_perinj, var_type_injury = "injury_type",
                       title = "Season 2018/2019", add_contour = T, 
                       cont_max_x = 6, cont_max_y = 130,
                       bins = 15)

p1 <- p1 +
  scale_x_continuous(limits = c(0, 5.5)) +
  scale_y_continuous(limits = c(0, 125)) + 
  scale_fill_manual(name = "Type of injury",
                    values = palette[c(8, 2:3, 5)]) + # get rid off the green (pos: 4) 
  guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3
p2 <- p2 +
  scale_x_continuous(limits = c(0, 5.5)) +
  scale_y_continuous(limits = c(0, 125)) + 
  scale_fill_manual(name = "Type of injury",
                    values = palette[c(7, 8, 2:3, 5)]) + # keep the same color coding
   guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3

grid.arrange(p1, p2, ncol = 2, 
             top = textGrob("Risk matrices", gp = gpar(fontsize = 26, font = 2))) ## for the main title


- How many players were injury free in each month?

We will plot polar area diagrams2.

gg_injprev_polar(injd, by = "monthly")
Code for further plot specifications
theme4 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x = element_text(size = 16),
                axis.text.y = element_text(size = 18),
                legend.title = element_text(size = 20),
                legend.text = element_text(size = 20),
                strip.text = element_text(size = 20))


gg_injprev_polar(injd, by = "monthly",
            title = "Proportion of injured and available\n players in each month") +
  scale_fill_manual(name = "Type of injury", 
                    values = c("seagreen3", "red3")) + 
  theme4

gg_injprev_polar(injd, by = "monthly", var_type_injury = "injury_type")
Code for further plot specifications
palette2 <- c("seagreen3", "#000000", "#E69F00", "#56B4E9", "#009E73",
             "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/


gg_injprev_polar(injd, by = "monthly", 
            var_type_injury = "injury_type",
            title = "Proportion of injured and available\n players in each month according to the type of injury") +
  scale_fill_manual(name = "Type of injury", 
                    values = palette2[c(1, 8:9, 3:4, 6)]) + 
  theme4


  1. These data sets are provided for illustrative purposes. We warn that they might not be accurate and could potentially include discrepancies or incomplete information compared to what actually occurred.↩︎

  2. See the Note section in ?injprev() or have a look at this section in Estimate summary statistics vignette, to better understand what the proportions refer to.↩︎