--- title: "Slides" execute: cache: true freeze: auto include: true echo: false warning: false number-sections: true --- ```{r} library(tidyverse) library(dplyr) library(ggplot2) library(survival) library(emmeans) library(foreign) library(gtsummary) library(gt) library(ggsurvfit) library(gridExtra) library(survminer) ``` ```{r} dat <- read.csv("./unos.txt", sep = "\t") names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death", "year", "sex", "tx.type", "follow.up") dat <- dat |> mutate( sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), hla.match = factor(hla.match), year = factor(year) ) ``` ```{r} my.theme <- theme_minimal() + theme( legend.position = "bottom", plot.title = element_text(size = 17, face = "bold"), axis.title = element_text(size = 15), axis.text = element_text(size = 12), legend.text = element_text(size = 12), plot.margin = margin(20, 30, 20, 30) ) ``` ::: {.callout-warning} There are 25 repondents with `follow.up = 0`, 21 of which with `death = 0` and other 4 with `death = 1`. ```{r, echo=TRUE} dat[dat$follow.up == 0, ] |> group_by(death) |> summarise(n = n()) ``` Reasons to omit them, that is, have `dat <- dat[dat$follow.up > 0,]` at the very beginning: - If `death = 1`, transplant failed. They shouldn't be included in the research question "survival rate of a *successful* tranplant". - If `death = 0`, transplant was successful, but they were immediately removed from populatoin at risk, contributiong nothing to the analysis. ::: # Title # Table of Contents (?) # Introduction (M) # Method & Result ## Identify Predictors (M) - Data source - Study population ```{r} nrow(dat) ``` - Covariates - Outcome ```{r} names(dat) ``` ## Table (L) ```{r} dat |> select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> tbl_summary( by = tx.type, statistic = list( all_continuous() ~ "{median} ({p25}, {p75})" ), label = list( hla.match ~ "HLA matches, n(%)", age.donor ~ "Donor age, median (IQR)", age.rec ~ "Recipient age, median (IQR)", cold.isc ~ "Cold ischemic time (hours), median (IQR), ", sex ~ "Sex, n(%)" ), missing = "ifany" ) |> add_overall() |> modify_footnote(all_stat_cols() ~ NA) ``` # Elementary Analysis: Transplant Type (L) ## Transplant Type vs Sex `sex` are similar across `tx.type` ```{r, fig.width=7, fig.height=5} plot.tx.sex <- dat |> mutate(Overall = "Overall") |> pivot_longer( cols = c(tx.type, Overall), names_to = "Attribute", values_to = "Category" ) |> mutate( Attribute = recode( Attribute, "tx.type" = "Transplant Type" ) ) |> count(Attribute, Category, sex) |> group_by(Attribute, Category) |> mutate( total = sum(n), percent = n / total ) |> ungroup() |> ggplot(aes(x = Category, y = percent, fill = sex)) + geom_col(position = "dodge") + facet_grid( ~ Attribute, scales = "free_x", space = "free_x" ) + labs( title = "Percentage of Sex by Transplant Type", x = "Group", y = "Percentage", fill = NULL ) + scale_x_discrete( labels = c("Cadaveric" = "Deceased") ) + my.theme plot.tx.sex ``` ## Transplant Type vs Recipient Age ```{r, fig.width=7, fig.height=5} plot.tx.rec <- ggplot(data = dat) + geom_density(aes(x = age.rec, color = tx.type)) + geom_density( aes(x = age.rec, color = "Overall"), linewidth = 0.5, linetype = "dashed" ) + scale_color_manual( values = c( "Cadaveric" = "darkorange", "Living" = "darkgreen", "Overall" = "gray50" ), labels = c( "Cadaveric" = "Deceased" ) ) + labs( title = "Density of Recipient Age by Transplant Type", x = "Recipient Age", y = "Density", color = NULL ) + my.theme plot.tx.rec ``` # Elementary Analysis: Transplant Type (Cont.) ## Transplant Type vs HLA Match Living donors have less `hla.match` then cadaveric donors. ```{r, fig.width=7, fig.height=5} ggplot(data = dat) + geom_bar(aes(x = hla.match)) + facet_wrap(tx.type ~ ., ncol = 1) + labs( title = "Distribution of HLA Match by Transplant Type", x = "HLA match", y = "Count", color = NULL ) + my.theme ``` ## Transplant Type vs Donor Age ```{r, fig.width=7, fig.height=5} plot.tx.donor <- ggplot(data = dat) + geom_density(aes(x = age.donor, color = tx.type)) + geom_density( aes(x = age.donor, color = "Overall"), linewidth = 0.5, linetype = "dashed" ) + scale_color_manual( values = c( "Cadaveric" = "darkorange", "Living" = "darkgreen", "Overall" = "gray50" ), labels = c( "Cadaveric" = "Deceased" ) ) + labs( title = "Density of Donor Age by Transplant Type", x = "Recipient Age", y = "Density", color = NULL ) + my.theme plot.tx.donor ``` # Overall Kaplan-Meier Curve ```{r} km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) ``` ```{r, fig.width=7, fig.height=5} km_all |> ggsurvfit(type = "survival") + add_confidence_interval() + scale_y_continuous(limits = c(0, 1)) + labs( title = "Kaplan-Meier Curve for Overall Survival", x = "Years of Follow-up", y = "Survival Probability", ) + my.theme ``` ```{r} summary(km_all, times = c(0, 4, 8, 12)) ``` # Cox Model (with intervals?) # Cox Model: Transplant Type ```{r} cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat) summary(cox.tx) ``` ```{r} km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat) ``` ```{r, fig.width=7, fig.height=5} km.tx |> ggsurvfit(type = "survival") + add_confidence_interval() + scale_color_discrete( labels = c("Deceased", "Living") ) + scale_fill_discrete( labels = c("Deceased", "Living") ) + labs( title = "Survival by Donor Type", x = "Years of Follow-up", y = "Survival Probability", color = "Donor Type", fill = "Donor Type" ) + my.theme ``` ```{r} summary(km.tx, times = c(0, 4, 8, 12)) ``` # Cox Model: Age Recipient (Continuous) ```{r} cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) summary(cox.age.rec) ``` ```{r} km.age.rec <- survfit(Surv(follow.up, death) ~ age.rec, data = dat) ``` ```{r, fig.width=7, fig.height=7} km.age.rec |> ggsurvfit(type = "survival") + scale_color_discrete( labels = seq(1, 18) ) + labs( title = "Survival by Recipient Age", x = "Years of Follow-up", y = "Survival Probability", color = "Recipient Age", ) + my.theme ```
View life table ```{r} summary(km.age.rec, times = c(0, 4, 8, 12)) ```
# Cox Model: Age Recipient (Categorical) ```{r} class.age <- function(age) { if (is.na(age)) return(NA) if (age < 2) { return("0-1") } else if (age < 6) { return("2-5") } else if (age < 11) { return("6-10") } else if (age < 19) { return("11-18") } } dat.age.rec.group <- dat dat.age.rec.group$age.rec.group = sapply(dat$age.rec, class.age) |> factor(levels = c( "0-1", "2-5", "6-10", "11-18" )) cox.age.rec.group <- coxph(Surv(follow.up, death) ~ age.rec.group, data = dat.age.rec.group) ``` ```{r, echo=TRUE} cox.age.rec.group ``` ```{r, echo=TRUE} eme.age.rec.group <- emmeans(cox.age.rec.group, pairwise ~ age.rec.group, type = "response") eme.age.rec.group ``` ```{r} km.age.rec.group <- survfit(Surv(follow.up, death) ~ age.rec.group, data = dat.age.rec.group) ``` ```{r, fig.width=7, fig.height=7} km.age.rec.group |> ggsurvfit(type = "survival") + add_confidence_interval() + scale_color_discrete( labels = c("0-1", "2-5", "6-10", "11-18") ) + scale_fill_discrete( labels = c("0-1", "2-5", "6-10", "11-18") ) + labs( title = "Survival by Recipient Age Group", x = "Years of Follow-up", y = "Survival Probability", color = "Recipient Age Group", fill = "Recipient Age Group", ) + my.theme ``` ```{r, fig.width=7, fig.height=7} eme.age.rec.group$contrast |> as.data.frame() |> select(contrast, ratio, SE, p.value) |> mutate( contrast = factor(contrast), log.ratio = log(ratio), SE.log = SE / ratio, low = exp(log.ratio - 1.96 * SE.log), up = exp(log.ratio + 1.96 * SE.log) ) |> ggplot(aes(y = reorder(contrast, ratio))) + geom_errorbarh(aes(xmin = low, xmax = up)) + geom_point(aes(x = ratio)) + geom_vline(xintercept = 1, linetype = "dashed") + scale_y_discrete( labels = function(x) { gsub("[()]", "", x = x) } ) + labs( x = "Hazard Ratio", y = "Recipient Age Group Comparison", title = "Pairwise Hazard Ratios by Recipient Age Groups" ) + my.theme ``` # Identifying Predictors TODO: write a more generic function ```{r} Waldtest <- function(cox.fit, i.betas){ q <- length(i.betas) coefs <- cox.fit$coefficients var <- cox.fit$var[i.betas, i.betas] Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] p.value <- 1-pchisq(as.numeric(Wald), df = q) print(p.value) return(p.value) } ``` ```{r} vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", "tx.type") included <- vars == "tx.type" included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) find.best.surv <- function(included) { print('---') test.vars <- vars[!included] p.values <- sapply(test.vars, function(test.var) { new.surv <- reformulate( termlabels = c(vars[included], test.var), response = "Surv(follow.up, death)" ) fit.tx <- coxph(new.surv, data = dat) Waldtest(fit.tx, sum(included)) }) if (sum(p.values > 0.05) == sum(included)) { print(3) return(included) } if (sum(included == TRUE) == length(included)) { print(2) return(included) } print(1) print(which.min(p.values)) print(included) print(p.values) print(which.min(p.values)) included[which.min(p.values)] <- TRUE print(included) # find.best.surv(included) } find.best.surv(included) ``` ```{r} # surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match # fit.cox.model(surv, 3) # surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor # fit.cox.model(surv, 3) # surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc # fit.cox.model(surv, 3) # surv <- Surv(follow.up, death) ~ tx.type + age.rec + year # fit.cox.model(surv, 3) # surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex # fit.cox.model(surv, 3) ``` --- # Old
## Death & Censored ```{r, echo=TRUE} dat |> group_by(death) |> summarise( n = n(), percentage = n() / nrow(dat) * 100 ) ``` Most of the patients (95.24%) were censored. ```{r} n.pop <- nrow(dat) dat.accum <- dat[dat$follow.up > 0,] |> select(follow.up, death) |> mutate(censored = ifelse(death == 1, 0, 1)) |> group_by(follow.up) |> summarize( death = sum(death), censored = sum(censored), .groups = "drop" ) |> arrange(follow.up) |> mutate( death.accum = cumsum(death), censored.accum = cumsum(censored), event.accum = death.accum + censored.accum, n.at.risk = n.pop - c(0, event.accum[-n()]) ) dat.accum |> pivot_longer( cols = c(death.accum, censored.accum, n.at.risk), names_to = "type", values_to = "accum", ) |> ggplot(aes(x = follow.up, y = accum, color = type)) + geom_step(linewidth = 1, direction = "hv") + my.theme ``` ```{r} dat[dat$death == 1,] |> ggplot(aes(x = follow.up)) + geom_histogram(bins = 50) ``` ```{r} dat[dat$follow.up == 0,] ``` ```{r} head(dat) ``` ## Cox Model - Age as a Continuous Variable ```{r, echo=TRUE} cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) cox.age.rec |> summary() ``` ## Cox Model - Age as a Categorical Variable ```{r, fig.width=10, fig.height=10} cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + year, data = dat) cox.zph(cox.final) plot(cox.zph(cox.final)) ggcoxzph(cox.zph(cox.final), ggtheme = theme_minimal() + theme( legend.position = "bottom", plot.title = element_text(size = 17, face = "bold"), axis.title = element_text(size = 15), axis.text = element_text(size = 12), legend.text = element_text(size = 12), strip.text = element_blank(), plot.margin = margin(20, 30, 20, 30) )) ``` ## Table 1 (L) ### HLA match `hla.match` ```{r} dat$hla.match |> table(useNA = "always") ``` ```{r} ggplot(dat, aes(x = hla.match)) + geom_bar() + labs(x = "HLA match", y = "Count") + theme_minimal() ``` ```{r} ggplot(dat, aes(x = hla.match, fill = tx.type)) + geom_bar(position = "dodge") + labs(x = "HLA match", y = "Count") + theme_minimal() ``` ::: {.callout-warning} TODO: change to percentage within each `tx.type` ::: ::: {.callout-note} Living donors have less `hla.match` then cadaveric donors ::: ### Donor age `age.donor` ```{r} #| echo: true mean(dat$age.donor, na.rm = TRUE) median(dat$age.donor, na.rm = TRUE) ``` ```{r} ggplot(dat, aes(x = age.donor)) + geom_bar() + labs(x = "Donor Age", y = "Count") + theme_minimal() ``` ```{r, fig.width = 8, fig.height = 12} ggplot(dat, aes(x = age.donor)) + geom_bar() + labs(x = "Donor Age", y = "Count") + theme_minimal() + facet_grid(hla.match ~ .) ``` ```{r, fig.width= 8, fig.height= 12} ggplot(dat, aes(x = age.donor, color = tx.type)) + geom_density() + labs(x = "Donor Age", y = "Density") + theme_minimal() + facet_grid(hla.match ~ .) ``` ```{r} ggplot(dat, aes(x = age.donor, color = hla.match)) + geom_density() + labs(x = "Donor Age", y = "Count") + theme_minimal() ``` ```{r} ggplot(dat, aes(x = as.factor(tx.type), y = age.donor)) + geom_boxplot() + theme_minimal() ``` ::: {.callout-note} There is age difference between different `tx.type` ::: ### Recipient Age `age.rec` ```{r} #| echo: true mean(dat$age.rec, na.rm = TRUE) median(dat$age.rec, na.rm = TRUE) ``` ```{r} ggplot(dat, aes(x = age.rec)) + geom_bar() + labs(x = "Recipient Age", y = "Count") + theme_minimal() ``` ```{r} ggplot(dat, aes(x = as.factor(age.rec), y = age.donor)) + geom_boxplot() + theme_minimal() ``` ```{r} ggplot(dat, aes(x = age.rec, fill = tx.type)) + geom_bar(position = "dodge") + theme_minimal() ``` ```{r} ggplot(dat, aes(x = age.rec, color = hla.match)) + geom_density() + theme_minimal() ``` ```{r} ggplot(dat, aes(x = as.factor(age.rec), y = cold.isc)) + geom_boxplot() + theme_minimal() ``` ### Cold Ischemia Time `cold.isc` ```{r} #| echo: true mean(dat$cold.isc, na.rm = TRUE) median(dat$cold.isc, na.rm = TRUE) ``` ```{r} ggplot(dat, aes(x = cold.isc)) + geom_density() + labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + theme_minimal() ``` ```{r} ggplot(dat, aes(x = cold.isc, color = tx.type, group = tx.type)) + geom_density() + labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + theme_minimal() ``` ```{r} ggplot(dat, aes(x = cold.isc)) + geom_density() + theme_minimal() + facet_grid(tx.type ~ .) ``` ### Transplant Type `tx.type` ```{r} dat$tx.type |> table(useNA = "always") ``` ```{r} #| echo: true dat$tx.type |> table() ``` ```{r} ggplot(dat, aes(x = tx.type)) + geom_bar() + labs(x = "Transplant Type", y = "Count") + theme_minimal() ``` ### Year `year` ```{r} dat$year |> table() ``` ```{r} ggplot(dat, aes(x = year)) + geom_bar() + labs(x = "Year", y = "Count") + theme_minimal() ``` ```{r} ggplot(dat, aes(x = year, y = follow.up)) + geom_boxplot() ``` ```{r} ggplot(dat, aes(x = year, fill = tx.type)) + geom_bar(position = "dodge") ``` ```{r} ggplot(dat, aes(x = year, y = age.rec)) + geom_boxplot() ``` ```{r} ggplot(dat, aes(x = year, y = age.donor)) + geom_boxplot() ``` ## Kaplan-Meier ### Overall (L) ```{r} km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) summary(km_all, times = c(4, 8, 12)) ``` ```{r} km_all |> ggsurvfit(type = "survival") + add_confidence_interval() + scale_y_continuous(limits = c(0, 1), labels = scales::label_percent()) + labs( x = "Years of Follow-up", y = "Overall Survival Probability" ) + theme_minimal() ``` ### `tx.type` (M) ```{r, fig.width=7, fig.height=5} km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat) km.tx |> ggsurvfit(type = "survival") + add_confidence_interval() + scale_x_continuous( breaks = seq(0, 20, by = 4) ) + scale_y_continuous( limits = c(0, 1), breaks = seq(0, 1, by = 0.2), labels = scales::label_number(accuracy = 0.1) ) + labs( x = "Years of Follow-up", y = "Overall Survival Probability" ) + theme_minimal() + theme( legend.position = "bottom", plot.title = element_text(size = 17, face = "bold"), axis.title = element_text(size = 15), axis.text = element_text(size = 10), legend.text = element_text(size = 12), plot.margin = margin(20, 30, 20, 30) ) ``` ## Cox Model ```{r} get.life.table <- function(dat, time.intervals) { n.pop <- nrow(dat) dat |> recode.dat(time.intervals) |> group_by(fu.interval) |> summarize( n.censored = sum(.data$death == 0), n.event = sum(.data$death), ) |> ungroup() |> calculate.hazard(n.pop) } get.life.table.by.groups <- function(dat, time.intervals, grps) { grps |> lapply(function(grp) { dat |> get.life.table.by.group(time.intervals, grp) |> mutate( grp.name = grp, grp.value = pick(1)[[1]] ) |> select(-1) }) |> bind_rows() } get.life.table.by.group <- function(dat, time.intervals, grp) { dat |> recode.dat(time.intervals) |> group_by(fu.interval, .data[[grp]]) |> summarize( n.censored = sum(.data$death == 0), n.event = sum(.data$death), .groups = "keep" ) |> ungroup(fu.interval) |> group_modify(function(df.sub, grp) { grp.name <- names(grp) grp.value <- grp[[1]] n.pop <- (dat[[grp.name]] == grp.value) |> sum() calculate.hazard(df.sub, n.pop) }) |> ungroup() } calculate.hazard <- function(life.table, n.pop) { n.removed <- life.table$n.event + life.table$n.censored n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) life.table |> mutate( n.at.risk = n.pop - n.removed.accum, # TODO: how to account for censored? How do we adjust for uneven interval? hazard.rate = n.event / n.at.risk ) } recode.dat <- function(dat, time.intervals) { df <- dat[dat$follow.up <= sum(time.intervals), ] time.points <- cumsum(time.intervals) df$fu.interval <- sapply(df$follow.up, function(time) { time.points[time <= time.points][1] }) df } ``` ### `death` Distribution (?) ```{r} # dat |> # mutate( # accum.death = cumsum(death), # accum.censored = if_else(death == 1, 0, 1) |> cumsum() # ) |> # ggplot() + # geom_pont(aes(x = follow.up, y = accum.death, color = death)) ``` ```{r} ``` ```{r} plot.death = dat[dat$death == 1,] |> ggplot(aes(x = follow.up)) + geom_histogram(bins = 50) plot.death ``` ### Overall (L) ::: {.callout-note} skip overall, jump directly to `tx.type` ::: ```{r} time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) get.life.table(dat, time.intervals) ``` ### `tx.type` (M) ### `age` (continuous) (L) ### `age` (categorical) (M) ### Full model (L) ```{r} m1 <- coxph(Surv(follow.up, death) ~ hla.match + tx.type, data = dat) summary(m1) m2 <- coxph(Surv(follow.up, death) ~ hla.match * tx.type, data = dat) summary(m2) anova(m1, m2, test = "LRT") ``` ### Discussion: include `year` or not? ## Assumption Testing ## Why Survival Analysis (L) Motivation: study the distribution of time to event $T$. Example: time of death after kidney transplant. ```{r} ex <- data.frame( id = c(1, 2, 3, 4, 5), transplant = c(2000, 2000, 2001, 2003, 2004), death = c(2005, 2009, 2005, 2004, 2016) ) ex_table_1 <- ex |> gt() |> cols_label( id = "ID", transplant = "Transplant", death = "Death" ) %>% cols_align(align = "center", columns = everything()) ex_plot_real_time <- ggplot(ex) + geom_segment(aes(x = transplant, xend = death, y = factor(id), yend = factor(id)), color = "grey50", linewidth = 1) + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + scale_y_discrete(limits = rev) + labs(x = "Year", y = "Subject ID", color = "Event") + theme_minimal() ``` ```{r} ex_table_1 ex_plot_real_time ``` --- We then calculate time to death for each respondents. With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X` ```{r} ex_t <- ex |> mutate( t = death - transplant ) ex_table_2 <- ex_t |> gt() |> cols_label( id = "ID", transplant = "Transplant", death = "Death", t = "Time to death" ) |> cols_align(align = "center", columns = everything()) ex_plot_uniform_time <- ggplot(ex_t) + geom_segment(aes(x = 0, xend = t, y = factor(id), yend = factor(id)), color = "grey50", linewidth = 1) + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + scale_y_discrete(limits = rev) + labs(x = "Year", y = "Subject ID", color = "Event") + theme_minimal() ``` ```{r} ex_table_2 ex_plot_uniform_time ``` --- What if we do not know when exactly some respondents die? Scenario 1: the study ends at the year 2008? ```{r} ex_t_c <- ex_t |> mutate( death_censored = if_else(death <= 2008, death, 2008), death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"), status = if_else(death <= 2008, 1, 0), t_1 = death_censored - transplant, t_1_txt = if_else(death <= 2008, as.character(t_1), paste(">", as.character(t_1))) ) ``` ```{r} ex_table_3 <- ex_t_c |> select(id, transplant, death_censored_txt, t_1_txt) |> gt() |> cols_label( id = "ID", transplant = "Transplant", death_censored_txt = "Death", t_1_txt = "Time to death" ) |> cols_align(align = "center", columns = everything()) ex_plot_real_time_1 <- ggplot(ex_t_c) + geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), yend = factor(id)), color = "grey50", linewidth = 1) + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + geom_point(aes(x = death_censored, y = factor(id), color = if_else(status == 1, "death", "censored")), size = 3) + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", "censored" = "orange")) + scale_y_discrete(limits = rev) + labs(x = "Year", y = "Subject ID", color = "Event") + xlim(2000, 2016) + geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", linewidth = 0.8) + theme_minimal() ex_plot_uniform_time_1 <- ggplot(ex_t_c) + geom_segment(aes(x = 0, xend = t_1, y = factor(id), yend = factor(id)), color = "grey50", linewidth = 1) + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + geom_point(aes(x = t_1, y = factor(id), color = if_else(status == 1, "death", "censored")), size = 3) + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", "censored" = "orange")) + scale_y_discrete(limits = rev) + labs(x = "Year", y = "Subject ID", color = "Event") + theme_minimal() ``` ```{r} ex_table_3 ex_plot_real_time_1 ex_plot_uniform_time_1 ``` **Right Censoring**: only observe the event (death) if it occurs before a certain time (2008). --- Scenario 2: respondent 3 move away; loss follow up ```{r} ex_alt <- ex_t_c ex_alt$death_censored_txt[3] <- "> 2005" ex_alt$status[3] <- 0 ex_alt$t_1_txt[3] <- "> 4" ex_table_4 <- ex_alt |> select(id, transplant, death_censored_txt, t_1_txt) |> gt() |> cols_label( id = "ID", transplant = "Transplant", death_censored_txt = "Death", t_1_txt = "Time to death" ) |> cols_align(align = "center", columns = everything()) ex_plot_real_time_2 <- ggplot(ex_alt) + geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), yend = factor(id)), color = "grey50", linewidth = 1) + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + geom_point(aes(x = death_censored, y = factor(id), color = if_else(status == 1, "death", "censored")), size = 3) + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", "censored" = "orange")) + scale_y_discrete(limits = rev) + labs(x = "Year", y = "Subject ID", color = "Event") + xlim(2000, 2016) + geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", linewidth = 0.8) + theme_minimal() ex_plot_uniform_time_2 <- ggplot(ex_alt) + geom_segment(aes(x = 0, xend = t_1, y = factor(id), yend = factor(id)), color = "grey50", linewidth = 1) + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + geom_point(aes(x = t_1, y = factor(id), color = if_else(status == 1, "death", "censored")), size = 3) + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", "censored" = "orange")) + scale_y_discrete(limits = rev) + labs(x = "Year", y = "Subject ID", color = "Event") + theme_minimal() ``` ```{r} ex_table_4 ex_plot_real_time_2 ex_plot_uniform_time_2 ``` --- How many patients are right censored? ```{r} dat |> mutate(Overall = "Overall") |> pivot_longer( cols = c(Overall, sex, tx.type), names_to = "Attribute", values_to = "Category" ) |> count(Attribute, Category, death) |> ggplot(aes(x = Category, y = n, fill = factor(death))) + geom_col(position = "dodge") + facet_wrap(~ Attribute, scales = "free_x") + labs(x = "Group", y = "Count", fill = "Death Status") + theme_minimal() ``` ## Challenges in Survival Analysis (L) - **Right Censoring**: only observe the event if it occurs before a certain time. - **Left Censoring**: event has occurred prior to the start of a research - Follow up every 3 years. - Event has occurred -> event happened sometime before follow up. - **Left Truncation**: delayed entry; respondents are included only if they survived long enough. - Start follow up with patients 100 days after the transplant - Patients dies within 100 day wouldn't be included in the dataset - **Right Truncation**: respondents are included only if they have already experienced the event. - Retrospective analysis from deceased patients between 1990 and 2000. - Patients not dead before 2000 are not included in the dataset.