diff --git a/report.Rmd b/report.Rmd index f5f6edd..f51fe1b 100644 --- a/report.Rmd +++ b/report.Rmd @@ -546,23 +546,20 @@ recode.age <- function(dat, ages) { dat.age.rec <- dat[!is.na(dat$age.rec), ] class.age <- function(age) { if (age < 2) { - return("0,1") + return("0-1") } else if (age < 6) { - return("2,3,4,5") + return("2-5") } else if (age < 11) { - return("6,7,8,9,10") + return("6-10") } else if (age < 19) { return("11-18") } } -``` - -```{r} dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |> factor(levels = c( - "0,1", - "2,3,4,5", - "6,7,8,9,10", + "0-1", + "2-5", + "6-10", "11-18" )) @@ -574,9 +571,6 @@ cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec) emmeans(cox.age.rec, pairwise ~ age.group, type = "response") ``` -```{r} -cox.age.rec |> ggsurvfit() -``` # Exercise 7 @@ -705,6 +699,7 @@ H1: PH-assumption doesn't hold # different cox model!! cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat) cox.zph(cox.final) +cox.final ``` @@ -715,6 +710,11 @@ A non-random pattern or slope in a plot of scaled residuals against time indicat plot(cox.zph(cox.final)) ``` +```{r, fig.width=10, fig.height=10} +plot(cox.zph(cox.final)) +``` + + NOTES: diff --git a/slides.Rmd b/slides.Rmd index 871e462..024b4b4 100644 --- a/slides.Rmd +++ b/slides.Rmd @@ -5,6 +5,7 @@ execute: freeze: auto include: true echo: false + warning: false number-sections: true --- @@ -18,8 +19,12 @@ library(foreign) library(gtsummary) library(gt) library(ggsurvfit) +library(gridExtra) +library(survminer) +library(viridis) ``` + ```{r} dat <- read.csv("./unos.txt", sep = "\t") names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death", @@ -33,306 +38,565 @@ dat <- dat |> ) ``` -# Introduction: research question - -## Survival after transplantation (M) - -## Identify Predictors (M) - -## Why Survival Analysis (L) - -Motivation: study the distribution of time to event $T$. - -Example: time of death after kidney transplant. - - ```{r} -ex <- data.frame( - id = c(1, 2, 3, 4, 5), - transplant = c(2000, 2000, 2001, 2003, 2004), - death = c(2005, 2009, 2005, 2004, 2016) -) - -ex_table_1 <- ex |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death = "Death" - ) %>% - cols_align(align = "center", columns = everything()) - -ex_plot_real_time <- ggplot(ex) + - geom_segment(aes(x = transplant, xend = death, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() -``` - -```{r} -ex_table_1 -ex_plot_real_time -``` - ---- - -We then calculate time to death for each respondents. - -With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X` - -```{r} -ex_t <- ex |> - mutate( - t = death - transplant - ) - -ex_table_2 <- ex_t |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death = "Death", - t = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) - -ex_plot_uniform_time <- ggplot(ex_t) + - geom_segment(aes(x = 0, xend = t, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() - -``` - - -```{r} -ex_table_2 -ex_plot_uniform_time -``` - ---- - -What if we do not know when exactly some respondents die? - -Scenario 1: the study ends at the year 2008? - -```{r} -ex_t_c <- ex_t |> - mutate( - death_censored = if_else(death <= 2008, death, 2008), - death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"), - status = if_else(death <= 2008, 1, 0), - t_1 = death_censored - transplant, - t_1_txt = if_else(death <= 2008, as.character(t_1), - paste(">", as.character(t_1))) +my.theme <- theme_minimal() + + theme( + legend.position = "bottom", + plot.title = element_text(size = 17, face = "bold"), + axis.title = element_text(size = 15), + axis.text = element_text(size = 12), + legend.text = element_text(size = 12), + plot.margin = margin(20, 30, 20, 30) ) ``` -```{r} -ex_table_3 <- ex_t_c |> - select(id, transplant, death_censored_txt, t_1_txt) |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death_censored_txt = "Death", - t_1_txt = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) +::: {.callout-warning} +There are 25 repondents with `follow.up = 0`, 21 of which with `death = 0` and +other 4 with `death = 1`. -ex_plot_real_time_1 <- ggplot(ex_t_c) + - geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death_censored, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - xlim(2000, 2016) + - geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", - linewidth = 0.8) + - theme_minimal() - - -ex_plot_uniform_time_1 <- ggplot(ex_t_c) + - geom_segment(aes(x = 0, xend = t_1, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t_1, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() +```{r, echo=TRUE} +dat[dat$follow.up == 0, ] |> + group_by(death) |> + summarise(n = n()) ``` +Reasons to omit them, that is, have `dat <- dat[dat$follow.up > 0,]` at the +very beginning: -```{r} -ex_table_3 -ex_plot_real_time_1 -ex_plot_uniform_time_1 -``` +- If `death = 1`, transplant failed. They shouldn't be included in the research + question "survival rate of a *successful* tranplant". +- If `death = 0`, transplant was successful, but they were immediately removed + from populatoin at risk, contributiong nothing to the analysis. +::: -**Right Censoring**: only observe the event (death) if it occurs before a -certain time (2008). +# Title ---- +# Table of Contents (?) -Scenario 2: respondent 3 move away; loss follow up - -```{r} -ex_alt <- ex_t_c -ex_alt$death_censored_txt[3] <- "> 2005" -ex_alt$status[3] <- 0 -ex_alt$t_1_txt[3] <- "> 4" - - -ex_table_4 <- ex_alt |> - select(id, transplant, death_censored_txt, t_1_txt) |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death_censored_txt = "Death", - t_1_txt = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) - -ex_plot_real_time_2 <- ggplot(ex_alt) + - geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death_censored, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - xlim(2000, 2016) + - geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", - linewidth = 0.8) + - theme_minimal() - - -ex_plot_uniform_time_2 <- ggplot(ex_alt) + - geom_segment(aes(x = 0, xend = t_1, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t_1, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() -``` - -```{r} -ex_table_4 -ex_plot_real_time_2 -ex_plot_uniform_time_2 -``` - ---- - -How many patients are right censored? - -```{r} -dat |> - mutate(Overall = "Overall") |> - pivot_longer( - cols = c(Overall, sex, tx.type), - names_to = "Attribute", - values_to = "Category" - ) |> - count(Attribute, Category, death) |> - ggplot(aes(x = Category, y = n, fill = factor(death))) + - geom_col(position = "dodge") + - facet_wrap(~ Attribute, scales = "free_x") + - labs(x = "Group", y = "Count", fill = "Death Status") + - theme_minimal() -``` - -## Challenges in Survival Analysis (L) - -- **Right Censoring**: only observe the event if it occurs before a certain - time. -- **Left Censoring**: event has occurred prior to the start of a research - - Follow up every 3 years. - - Event has occurred -> event happened sometime before follow up. -- **Left Truncation**: delayed entry; respondents are included only if they - survived long enough. - - Start follow up with patients 100 days after the transplant - - Patients dies within 100 day wouldn't be included in the dataset -- **Right Truncation**: respondents are included only if they have already - experienced the event. - - Retrospective analysis from deceased patients between 1990 and 2000. - - Patients not dead before 2000 are not included in the dataset. +# Introduction (M) # Method & Result -## Explain Dataset (M) +## Identify Predictors (M) -::: {.callout-note} -# Things to highlight in figures +- Data source +- Study population +```{r} +nrow(dat) +``` +- Covariates +- Outcome +```{r} +names(dat) +``` -- sex and recipient age are similar across `tx.type` +## Table (L) -- Living donors have less `hla.match` then cadaveric donors +```{r} +dat |> + select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> + tbl_summary( + by = tx.type, + statistic = list( + all_continuous() ~ "{median} ({p25}, {p75})" + ), + label = list( + hla.match ~ "HLA matches, n(%)", + age.donor ~ "Donor age, median (IQR)", + age.rec ~ "Recipient age, median (IQR)", + cold.isc ~ "Cold ischemic time (hours), median (IQR), ", + sex ~ "Sex, n(%)" + ), + missing = "ifany" + ) |> + add_overall() |> + modify_footnote(all_stat_cols() ~ NA) +``` -- There is age difference between different `tx.type` -::: +# Elementary Analysis: Transplant Type (L) +## Transplant Type vs Sex -### `sex` ~ `tx.tpye` - -::: {.callout-note} `sex` are similar across `tx.type` -::: ```{r, fig.width=7, fig.height=5} -dat |> - count(tx.type, sex) |> - group_by(tx.type) |> +plot.tx.sex <- dat |> + mutate(Overall = "Overall") |> + pivot_longer( + cols = c(tx.type, Overall), + names_to = "Attribute", + values_to = "Category" + ) |> + mutate( + Attribute = recode( + Attribute, + "tx.type" = "Transplant Type" + ) + ) |> + count(Attribute, Category, sex) |> + group_by(Attribute, Category) |> mutate( total = sum(n), percent = n / total ) |> ungroup() |> - ggplot() + - geom_col(aes(x = tx.type, y = percent, fill = sex), position = "dodge") + - labs( - title = "Percentage of Sex in Transplant Type", - y = "Percentage", - x = "Transplant Type", - fill = NULL - - ) + - scale_x_discrete( - labels = c("Cadaveric" = "Deceased"), + ggplot(aes(x = Category, y = percent, fill = sex)) + + geom_col(position = "dodge") + + facet_grid( + ~ Attribute, + scales = "free_x", + space = "free_x" ) + - theme_minimal() + - theme( - legend.position = "bottom", - plot.title = element_text(size = 17, face = "bold"), - axis.title = element_text(size = 15), - axis.text = element_text(size = 10), - legend.text = element_text(size = 12), - plot.margin = margin(20, 30, 20, 30) + labs( + title = "Percentage of Sex by Transplant Type", + x = "Group", + y = "Percentage", + fill = NULL + ) + + scale_x_discrete( + labels = c("Cadaveric" = "Deceased") + ) + + my.theme +plot.tx.sex +``` + +## Transplant Type vs Recipient Age + +```{r, fig.width=7, fig.height=5} +plot.tx.rec <- ggplot(data = dat) + + geom_density(aes(x = age.rec, color = tx.type)) + + geom_density( + aes(x = age.rec, color = "Overall"), + linewidth = 0.5, + linetype = "dashed" + ) + + scale_color_manual( + values = c( + "Cadaveric" = "darkorange", + "Living" = "darkgreen", + "Overall" = "gray50" + ), + labels = c( + "Cadaveric" = "Deceased" + ) + ) + + labs( + title = "Density of Recipient Age by Transplant Type", + x = "Recipient Age", + y = "Density", + color = NULL + ) + + my.theme +plot.tx.rec +``` + + +# Elementary Analysis: Transplant Type (Cont.) + +## Transplant Type vs HLA Match + +Living donors have less `hla.match` then cadaveric donors. + +```{r, fig.width=7, fig.height=5} +ggplot(data = dat) + + geom_bar(aes(x = hla.match)) + + facet_wrap(tx.type ~ ., ncol = 1) + + labs( + title = "Distribution of HLA Match by Transplant Type", + x = "HLA match", + y = "Count", + color = NULL + ) + + my.theme + +``` + +## Transplant Type vs Donor Age + +```{r, fig.width=7, fig.height=5} +plot.tx.donor <- ggplot(data = dat) + + geom_density(aes(x = age.donor, color = tx.type)) + + geom_density( + aes(x = age.donor, color = "Overall"), + linewidth = 0.5, + linetype = "dashed" + ) + + scale_color_manual( + values = c( + "Cadaveric" = "darkorange", + "Living" = "darkgreen", + "Overall" = "gray50" + ), + labels = c( + "Cadaveric" = "Deceased" + ) + ) + + labs( + title = "Density of Donor Age by Transplant Type", + x = "Recipient Age", + y = "Density", + color = NULL + ) + + my.theme + +plot.tx.donor +``` + +# Overall Kaplan-Meier Curve + +```{r} +km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) +``` + +```{r, fig.width=7, fig.height=5} +km_all |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_y_continuous(limits = c(0, 1)) + + labs( + title = "Kaplan-Meier Curve for Overall Survival", + x = "Years of Follow-up", + y = "Survival Probability", + ) + + my.theme +``` + +```{r} +summary(km_all, times = c(0, 4, 8, 12)) +``` + +# Cox Model (with intervals?) + +# Cox Model: Transplant Type + +```{r} +cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat) +summary(cox.tx) +``` + +```{r} +km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat) +``` + +```{r, fig.width=7, fig.height=5} +km.tx |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_color_discrete( + labels = c("Deceased", "Living") + ) + + scale_fill_discrete( + labels = c("Deceased", "Living") + ) + + labs( + title = "Survival by Donor Type", + x = "Years of Follow-up", + y = "Survival Probability", + color = "Donor Type", + fill = "Donor Type" + ) + + my.theme +``` + +```{r} +summary(km.tx, times = c(0, 4, 8, 12)) +``` + +# Cox Model: Age Recipient (Continuous) + +```{r} +cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) +summary(cox.age.rec) +``` + +```{r} +km.age.rec <- survfit(Surv(follow.up, death) ~ age.rec, data = dat) +``` + +```{r, fig.width=7, fig.height=7} +km.age.rec |> + ggsurvfit(type = "survival") + + scale_color_discrete( + labels = seq(1, 18) + ) + + labs( + title = "Survival by Recipient Age", + x = "Years of Follow-up", + y = "Survival Probability", + color = "Recipient Age", + ) + + my.theme +``` + +
+View life table + +```{r} +summary(km.age.rec, times = c(0, 4, 8, 12)) +``` +
+ + +# Cox Model: Age Recipient (Categorical) + +```{r} +class.age <- function(age) { + if (is.na(age)) return(NA) + if (age < 2) { + return("0-1") + } else if (age < 6) { + return("2-5") + } else if (age < 11) { + return("6-10") + } else if (age < 19) { + return("11-18") + } +} + +dat.age.rec.group <- dat +dat.age.rec.group$age.rec.group = sapply(dat$age.rec, class.age) |> + factor(levels = c( + "0-1", + "2-5", + "6-10", + "11-18" + )) +cox.age.rec.group <- coxph(Surv(follow.up, death) ~ age.rec.group, + data = dat.age.rec.group) +``` + + +```{r, echo=TRUE} +cox.age.rec.group +``` + + + + +```{r, echo=TRUE} +eme.age.rec.group <- emmeans(cox.age.rec.group, pairwise ~ age.rec.group, + type = "response") +eme.age.rec.group +``` + + +```{r} +km.age.rec.group <- survfit(Surv(follow.up, death) ~ age.rec.group, + data = dat.age.rec.group) +``` + +```{r, fig.width=7, fig.height=7} +km.age.rec.group |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_color_discrete( + labels = c("0-1", "2-5", "6-10", "11-18") + ) + + scale_fill_discrete( + labels = c("0-1", "2-5", "6-10", "11-18") + ) + + labs( + title = "Survival by Recipient Age Group", + x = "Years of Follow-up", + y = "Survival Probability", + color = "Recipient Age Group", + fill = "Recipient Age Group", + ) + + my.theme +``` + +```{r, fig.width=7, fig.height=7} +eme.age.rec.group$contrast |> + as.data.frame() |> + select(contrast, ratio, SE, p.value) |> + mutate( + contrast = factor(contrast), + log.ratio = log(ratio), + SE.log = SE / ratio, + low = exp(log.ratio - 1.96 * SE.log), + up = exp(log.ratio + 1.96 * SE.log) + ) |> + ggplot(aes(y = reorder(contrast, ratio))) + + geom_errorbarh(aes(xmin = low, xmax = up)) + + geom_point(aes(x = ratio)) + + geom_vline(xintercept = 1, linetype = "dashed") + + scale_y_discrete( + labels = function(x) { + gsub("[()]", "", x = x) + } + ) + + labs( + x = "Hazard Ratio", + y = "Recipient Age Group Comparison", + title = "Pairwise Hazard Ratios by Recipient Age Groups" + ) + + my.theme +``` + +# Identifying Predictors + +TODO: write a more generic function + +```{r} +Waldtest <- function(cox.fit, i.betas){ + q <- length(i.betas) + coefs <- cox.fit$coefficients + var <- cox.fit$var[i.betas, i.betas] + Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] + p.value <- 1-pchisq(as.numeric(Wald), df = q) + print(p.value) + return(p.value) +} +``` + +```{r} +vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", + "tx.type") +included <- vars == "tx.type" +included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) +included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) + +find.best.surv <- function(included) { + print('---') + test.vars <- vars[!included] + p.values <- sapply(test.vars, function(test.var) { + new.surv <- reformulate( + termlabels = c(vars[included], test.var), + response = "Surv(follow.up, death)" + ) + fit.tx <- coxph(new.surv, data = dat) + Waldtest(fit.tx, sum(included)) + }) + if (sum(p.values > 0.05) == sum(included)) { + print(3) + return(included) + } + if (sum(included == TRUE) == length(included)) { + print(2) + return(included) + } + print(1) + print(which.min(p.values)) + print(included) + print(p.values) + print(which.min(p.values)) + included[which.min(p.values)] <- TRUE + print(included) + # find.best.surv(included) +} + + +find.best.surv(included) + +``` + +```{r} +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + year +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex +fit.cox.model(surv, 3) +``` + + +``` + +--- + + +# Old + +
+ +## Death & Censored + + +```{r, echo=TRUE} +dat |> + group_by(death) |> + summarise( + n = n(), + percentage = n() / nrow(dat) * 100 ) +``` + +Most of the patients (95.24%) were censored. + +```{r} +n.pop <- nrow(dat) +dat.accum <- dat[dat$follow.up > 0,] |> + select(follow.up, death) |> + mutate(censored = ifelse(death == 1, 0, 1)) |> + group_by(follow.up) |> + summarize( + death = sum(death), + censored = sum(censored), + .groups = "drop" + ) |> + arrange(follow.up) |> + mutate( + death.accum = cumsum(death), + censored.accum = cumsum(censored), + event.accum = death.accum + censored.accum, + n.at.risk = n.pop - c(0, event.accum[-n()]) + ) +dat.accum |> + pivot_longer( + cols = c(death.accum, censored.accum, n.at.risk), + names_to = "type", + values_to = "accum", + ) |> + ggplot(aes(x = follow.up, y = accum, color = type)) + + geom_step(linewidth = 1, direction = "hv") + + my.theme +``` + +```{r} +dat[dat$death == 1,] |> + ggplot(aes(x = follow.up)) + + geom_histogram(bins = 50) +``` +```{r} +dat[dat$follow.up == 0,] +``` + +```{r} +head(dat) +``` + +## Cox Model - Age as a Continuous Variable + +```{r, echo=TRUE} +cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) +cox.age.rec |> summary() +``` + +## Cox Model - Age as a Categorical Variable + + +```{r, fig.width=10, fig.height=10} +cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + year, data = dat) +cox.zph(cox.final) +plot(cox.zph(cox.final)) + +ggcoxzph(cox.zph(cox.final), + ggtheme = theme_minimal() + + theme( + legend.position = "bottom", + plot.title = element_text(size = 17, face = "bold"), + axis.title = element_text(size = 15), + axis.text = element_text(size = 12), + legend.text = element_text(size = 12), + strip.text = element_blank(), + plot.margin = margin(20, 30, 20, 30) + )) ``` ## Table 1 (L) @@ -724,5 +988,246 @@ anova(m1, m2, test = "LRT") ## Assumption Testing -# Conclusion (M + L) + + ## Why Survival Analysis (L) + +Motivation: study the distribution of time to event $T$. + +Example: time of death after kidney transplant. + + +```{r} +ex <- data.frame( + id = c(1, 2, 3, 4, 5), + transplant = c(2000, 2000, 2001, 2003, 2004), + death = c(2005, 2009, 2005, 2004, 2016) +) + +ex_table_1 <- ex |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death = "Death" + ) %>% + cols_align(align = "center", columns = everything()) + +ex_plot_real_time <- ggplot(ex) + + geom_segment(aes(x = transplant, xend = death, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + +```{r} +ex_table_1 +ex_plot_real_time +``` + +--- + +We then calculate time to death for each respondents. + +With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X` + +```{r} +ex_t <- ex |> + mutate( + t = death - transplant + ) + +ex_table_2 <- ex_t |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death = "Death", + t = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_uniform_time <- ggplot(ex_t) + + geom_segment(aes(x = 0, xend = t, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() + +``` + + +```{r} +ex_table_2 +ex_plot_uniform_time +``` + +--- + +What if we do not know when exactly some respondents die? + +Scenario 1: the study ends at the year 2008? + +```{r} +ex_t_c <- ex_t |> + mutate( + death_censored = if_else(death <= 2008, death, 2008), + death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"), + status = if_else(death <= 2008, 1, 0), + t_1 = death_censored - transplant, + t_1_txt = if_else(death <= 2008, as.character(t_1), + paste(">", as.character(t_1))) + ) +``` + +```{r} +ex_table_3 <- ex_t_c |> + select(id, transplant, death_censored_txt, t_1_txt) |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death_censored_txt = "Death", + t_1_txt = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_real_time_1 <- ggplot(ex_t_c) + + geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death_censored, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + xlim(2000, 2016) + + geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", + linewidth = 0.8) + + theme_minimal() + + +ex_plot_uniform_time_1 <- ggplot(ex_t_c) + + geom_segment(aes(x = 0, xend = t_1, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t_1, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + + +```{r} +ex_table_3 +ex_plot_real_time_1 +ex_plot_uniform_time_1 +``` + +**Right Censoring**: only observe the event (death) if it occurs before a +certain time (2008). + +--- + +Scenario 2: respondent 3 move away; loss follow up + +```{r} +ex_alt <- ex_t_c +ex_alt$death_censored_txt[3] <- "> 2005" +ex_alt$status[3] <- 0 +ex_alt$t_1_txt[3] <- "> 4" + + +ex_table_4 <- ex_alt |> + select(id, transplant, death_censored_txt, t_1_txt) |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death_censored_txt = "Death", + t_1_txt = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_real_time_2 <- ggplot(ex_alt) + + geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death_censored, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + xlim(2000, 2016) + + geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", + linewidth = 0.8) + + theme_minimal() + + +ex_plot_uniform_time_2 <- ggplot(ex_alt) + + geom_segment(aes(x = 0, xend = t_1, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t_1, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + +```{r} +ex_table_4 +ex_plot_real_time_2 +ex_plot_uniform_time_2 +``` + +--- + +How many patients are right censored? + +```{r} +dat |> + mutate(Overall = "Overall") |> + pivot_longer( + cols = c(Overall, sex, tx.type), + names_to = "Attribute", + values_to = "Category" + ) |> + count(Attribute, Category, death) |> + ggplot(aes(x = Category, y = n, fill = factor(death))) + + geom_col(position = "dodge") + + facet_wrap(~ Attribute, scales = "free_x") + + labs(x = "Group", y = "Count", fill = "Death Status") + + theme_minimal() +``` + +## Challenges in Survival Analysis (L) + +- **Right Censoring**: only observe the event if it occurs before a certain + time. +- **Left Censoring**: event has occurred prior to the start of a research + - Follow up every 3 years. + - Event has occurred -> event happened sometime before follow up. +- **Left Truncation**: delayed entry; respondents are included only if they + survived long enough. + - Start follow up with patients 100 days after the transplant + - Patients dies within 100 day wouldn't be included in the dataset +- **Right Truncation**: respondents are included only if they have already + experienced the event. + - Retrospective analysis from deceased patients between 1990 and 2000. + - Patients not dead before 2000 are not included in the dataset. +