diff --git a/publish.sh b/publish.sh index f319fd5..04c97b0 100644 --- a/publish.sh +++ b/publish.sh @@ -19,6 +19,6 @@ git add -A git commit -m "Deploy static site" # Push this folder as the gh-pages branch of chmee/kidney -git push -f git@github.com:chmee/kidney.git main:gh-pages +git push -f git@github.com:chmlee/kidney.git master:gh-pages cd - diff --git a/report.Rmd b/report.Rmd index 89e97dc..f51fe1b 100644 --- a/report.Rmd +++ b/report.Rmd @@ -14,6 +14,7 @@ library(survival) library(emmeans) library(foreign) library(gtsummary) +library(ggsurvfit) ``` ```{r} @@ -114,6 +115,29 @@ table.split <- dat.table1 |> table.split ``` +```{r} +table.split <- dat.table1 |> + select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> + tbl_summary( + by = tx.type, + statistic = list( + all_continuous() ~ "{median} ({p25}, {p75})" + ), + label = list( + hla.match ~ "HLA matches, n(%)", + age.donor ~ "Donor age, median (IQR)", + age.rec ~ "Recipient age, median (IQR)", + cold.isc ~ "Cold ischemic time (hours), median (IQR), ", + sex ~ "Sex, n(%)" + ), + missing = "ifany" + ) |> + add_overall() |> + modify_footnote(all_stat_cols() ~ NA) + +table.split +``` + Calculate median follow-up (reverse Kaplan-Meier method), with 95% CI ```{r} @@ -367,9 +391,6 @@ recode.dat <- function(dat, time.intervals) { df } - - - ``` ```{r} @@ -525,29 +546,25 @@ recode.age <- function(dat, ages) { dat.age.rec <- dat[!is.na(dat$age.rec), ] class.age <- function(age) { if (age < 2) { - return("0,1") + return("0-1") } else if (age < 6) { - return("2,3,4,5") + return("2-5") } else if (age < 11) { - return("6,7,8,9,10") + return("6-10") } else if (age < 19) { return("11-18") } } -``` - -```{r} dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |> factor(levels = c( - "0,1", - "2,3,4,5", - "6,7,8,9,10", + "0-1", + "2-5", + "6-10", "11-18" )) cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec) -cox.age.rec |> summary() ``` ```{r} @@ -555,6 +572,7 @@ emmeans(cox.age.rec, pairwise ~ age.group, type = "response") ``` + # Exercise 7 Fit a multivariate Cox model by using other predictors and @@ -681,6 +699,7 @@ H1: PH-assumption doesn't hold # different cox model!! cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat) cox.zph(cox.final) +cox.final ``` @@ -691,6 +710,11 @@ A non-random pattern or slope in a plot of scaled residuals against time indicat plot(cox.zph(cox.final)) ``` +```{r, fig.width=10, fig.height=10} +plot(cox.zph(cox.final)) +``` + + NOTES: diff --git a/slides.Rmd b/slides.Rmd new file mode 100644 index 0000000..024b4b4 --- /dev/null +++ b/slides.Rmd @@ -0,0 +1,1233 @@ +--- +title: Slides +execute: + cache: true + freeze: auto + include: true + echo: false + warning: false +number-sections: true +--- + +```{r} +library(tidyverse) +library(dplyr) +library(ggplot2) +library(survival) +library(emmeans) +library(foreign) +library(gtsummary) +library(gt) +library(ggsurvfit) +library(gridExtra) +library(survminer) +library(viridis) +``` + + +```{r} +dat <- read.csv("./unos.txt", sep = "\t") +names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death", + "year", "sex", "tx.type", "follow.up") +dat <- dat |> + mutate( + sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), + tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), + hla.match = factor(hla.match), + year = factor(year) + ) +``` + +```{r} +my.theme <- theme_minimal() + + theme( + legend.position = "bottom", + plot.title = element_text(size = 17, face = "bold"), + axis.title = element_text(size = 15), + axis.text = element_text(size = 12), + legend.text = element_text(size = 12), + plot.margin = margin(20, 30, 20, 30) + ) +``` + +::: {.callout-warning} +There are 25 repondents with `follow.up = 0`, 21 of which with `death = 0` and +other 4 with `death = 1`. + +```{r, echo=TRUE} +dat[dat$follow.up == 0, ] |> + group_by(death) |> + summarise(n = n()) +``` + +Reasons to omit them, that is, have `dat <- dat[dat$follow.up > 0,]` at the +very beginning: + +- If `death = 1`, transplant failed. They shouldn't be included in the research + question "survival rate of a *successful* tranplant". +- If `death = 0`, transplant was successful, but they were immediately removed + from populatoin at risk, contributiong nothing to the analysis. +::: + +# Title + +# Table of Contents (?) + +# Introduction (M) + +# Method & Result + +## Identify Predictors (M) + +- Data source +- Study population +```{r} +nrow(dat) +``` +- Covariates +- Outcome +```{r} +names(dat) +``` + +## Table (L) + +```{r} +dat |> + select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> + tbl_summary( + by = tx.type, + statistic = list( + all_continuous() ~ "{median} ({p25}, {p75})" + ), + label = list( + hla.match ~ "HLA matches, n(%)", + age.donor ~ "Donor age, median (IQR)", + age.rec ~ "Recipient age, median (IQR)", + cold.isc ~ "Cold ischemic time (hours), median (IQR), ", + sex ~ "Sex, n(%)" + ), + missing = "ifany" + ) |> + add_overall() |> + modify_footnote(all_stat_cols() ~ NA) +``` + +# Elementary Analysis: Transplant Type (L) + +## Transplant Type vs Sex + +`sex` are similar across `tx.type` + +```{r, fig.width=7, fig.height=5} +plot.tx.sex <- dat |> + mutate(Overall = "Overall") |> + pivot_longer( + cols = c(tx.type, Overall), + names_to = "Attribute", + values_to = "Category" + ) |> + mutate( + Attribute = recode( + Attribute, + "tx.type" = "Transplant Type" + ) + ) |> + count(Attribute, Category, sex) |> + group_by(Attribute, Category) |> + mutate( + total = sum(n), + percent = n / total + ) |> + ungroup() |> + ggplot(aes(x = Category, y = percent, fill = sex)) + + geom_col(position = "dodge") + + facet_grid( + ~ Attribute, + scales = "free_x", + space = "free_x" + ) + + labs( + title = "Percentage of Sex by Transplant Type", + x = "Group", + y = "Percentage", + fill = NULL + ) + + scale_x_discrete( + labels = c("Cadaveric" = "Deceased") + ) + + my.theme +plot.tx.sex +``` + +## Transplant Type vs Recipient Age + +```{r, fig.width=7, fig.height=5} +plot.tx.rec <- ggplot(data = dat) + + geom_density(aes(x = age.rec, color = tx.type)) + + geom_density( + aes(x = age.rec, color = "Overall"), + linewidth = 0.5, + linetype = "dashed" + ) + + scale_color_manual( + values = c( + "Cadaveric" = "darkorange", + "Living" = "darkgreen", + "Overall" = "gray50" + ), + labels = c( + "Cadaveric" = "Deceased" + ) + ) + + labs( + title = "Density of Recipient Age by Transplant Type", + x = "Recipient Age", + y = "Density", + color = NULL + ) + + my.theme +plot.tx.rec +``` + + +# Elementary Analysis: Transplant Type (Cont.) + +## Transplant Type vs HLA Match + +Living donors have less `hla.match` then cadaveric donors. + +```{r, fig.width=7, fig.height=5} +ggplot(data = dat) + + geom_bar(aes(x = hla.match)) + + facet_wrap(tx.type ~ ., ncol = 1) + + labs( + title = "Distribution of HLA Match by Transplant Type", + x = "HLA match", + y = "Count", + color = NULL + ) + + my.theme + +``` + +## Transplant Type vs Donor Age + +```{r, fig.width=7, fig.height=5} +plot.tx.donor <- ggplot(data = dat) + + geom_density(aes(x = age.donor, color = tx.type)) + + geom_density( + aes(x = age.donor, color = "Overall"), + linewidth = 0.5, + linetype = "dashed" + ) + + scale_color_manual( + values = c( + "Cadaveric" = "darkorange", + "Living" = "darkgreen", + "Overall" = "gray50" + ), + labels = c( + "Cadaveric" = "Deceased" + ) + ) + + labs( + title = "Density of Donor Age by Transplant Type", + x = "Recipient Age", + y = "Density", + color = NULL + ) + + my.theme + +plot.tx.donor +``` + +# Overall Kaplan-Meier Curve + +```{r} +km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) +``` + +```{r, fig.width=7, fig.height=5} +km_all |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_y_continuous(limits = c(0, 1)) + + labs( + title = "Kaplan-Meier Curve for Overall Survival", + x = "Years of Follow-up", + y = "Survival Probability", + ) + + my.theme +``` + +```{r} +summary(km_all, times = c(0, 4, 8, 12)) +``` + +# Cox Model (with intervals?) + +# Cox Model: Transplant Type + +```{r} +cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat) +summary(cox.tx) +``` + +```{r} +km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat) +``` + +```{r, fig.width=7, fig.height=5} +km.tx |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_color_discrete( + labels = c("Deceased", "Living") + ) + + scale_fill_discrete( + labels = c("Deceased", "Living") + ) + + labs( + title = "Survival by Donor Type", + x = "Years of Follow-up", + y = "Survival Probability", + color = "Donor Type", + fill = "Donor Type" + ) + + my.theme +``` + +```{r} +summary(km.tx, times = c(0, 4, 8, 12)) +``` + +# Cox Model: Age Recipient (Continuous) + +```{r} +cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) +summary(cox.age.rec) +``` + +```{r} +km.age.rec <- survfit(Surv(follow.up, death) ~ age.rec, data = dat) +``` + +```{r, fig.width=7, fig.height=7} +km.age.rec |> + ggsurvfit(type = "survival") + + scale_color_discrete( + labels = seq(1, 18) + ) + + labs( + title = "Survival by Recipient Age", + x = "Years of Follow-up", + y = "Survival Probability", + color = "Recipient Age", + ) + + my.theme +``` + +
+View life table + +```{r} +summary(km.age.rec, times = c(0, 4, 8, 12)) +``` +
+ + +# Cox Model: Age Recipient (Categorical) + +```{r} +class.age <- function(age) { + if (is.na(age)) return(NA) + if (age < 2) { + return("0-1") + } else if (age < 6) { + return("2-5") + } else if (age < 11) { + return("6-10") + } else if (age < 19) { + return("11-18") + } +} + +dat.age.rec.group <- dat +dat.age.rec.group$age.rec.group = sapply(dat$age.rec, class.age) |> + factor(levels = c( + "0-1", + "2-5", + "6-10", + "11-18" + )) +cox.age.rec.group <- coxph(Surv(follow.up, death) ~ age.rec.group, + data = dat.age.rec.group) +``` + + +```{r, echo=TRUE} +cox.age.rec.group +``` + + + + +```{r, echo=TRUE} +eme.age.rec.group <- emmeans(cox.age.rec.group, pairwise ~ age.rec.group, + type = "response") +eme.age.rec.group +``` + + +```{r} +km.age.rec.group <- survfit(Surv(follow.up, death) ~ age.rec.group, + data = dat.age.rec.group) +``` + +```{r, fig.width=7, fig.height=7} +km.age.rec.group |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_color_discrete( + labels = c("0-1", "2-5", "6-10", "11-18") + ) + + scale_fill_discrete( + labels = c("0-1", "2-5", "6-10", "11-18") + ) + + labs( + title = "Survival by Recipient Age Group", + x = "Years of Follow-up", + y = "Survival Probability", + color = "Recipient Age Group", + fill = "Recipient Age Group", + ) + + my.theme +``` + +```{r, fig.width=7, fig.height=7} +eme.age.rec.group$contrast |> + as.data.frame() |> + select(contrast, ratio, SE, p.value) |> + mutate( + contrast = factor(contrast), + log.ratio = log(ratio), + SE.log = SE / ratio, + low = exp(log.ratio - 1.96 * SE.log), + up = exp(log.ratio + 1.96 * SE.log) + ) |> + ggplot(aes(y = reorder(contrast, ratio))) + + geom_errorbarh(aes(xmin = low, xmax = up)) + + geom_point(aes(x = ratio)) + + geom_vline(xintercept = 1, linetype = "dashed") + + scale_y_discrete( + labels = function(x) { + gsub("[()]", "", x = x) + } + ) + + labs( + x = "Hazard Ratio", + y = "Recipient Age Group Comparison", + title = "Pairwise Hazard Ratios by Recipient Age Groups" + ) + + my.theme +``` + +# Identifying Predictors + +TODO: write a more generic function + +```{r} +Waldtest <- function(cox.fit, i.betas){ + q <- length(i.betas) + coefs <- cox.fit$coefficients + var <- cox.fit$var[i.betas, i.betas] + Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] + p.value <- 1-pchisq(as.numeric(Wald), df = q) + print(p.value) + return(p.value) +} +``` + +```{r} +vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", + "tx.type") +included <- vars == "tx.type" +included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) +included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) + +find.best.surv <- function(included) { + print('---') + test.vars <- vars[!included] + p.values <- sapply(test.vars, function(test.var) { + new.surv <- reformulate( + termlabels = c(vars[included], test.var), + response = "Surv(follow.up, death)" + ) + fit.tx <- coxph(new.surv, data = dat) + Waldtest(fit.tx, sum(included)) + }) + if (sum(p.values > 0.05) == sum(included)) { + print(3) + return(included) + } + if (sum(included == TRUE) == length(included)) { + print(2) + return(included) + } + print(1) + print(which.min(p.values)) + print(included) + print(p.values) + print(which.min(p.values)) + included[which.min(p.values)] <- TRUE + print(included) + # find.best.surv(included) +} + + +find.best.surv(included) + +``` + +```{r} +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + year +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex +fit.cox.model(surv, 3) +``` + + +``` + +--- + + +# Old + +
+ +## Death & Censored + + +```{r, echo=TRUE} +dat |> + group_by(death) |> + summarise( + n = n(), + percentage = n() / nrow(dat) * 100 + ) +``` + +Most of the patients (95.24%) were censored. + +```{r} +n.pop <- nrow(dat) +dat.accum <- dat[dat$follow.up > 0,] |> + select(follow.up, death) |> + mutate(censored = ifelse(death == 1, 0, 1)) |> + group_by(follow.up) |> + summarize( + death = sum(death), + censored = sum(censored), + .groups = "drop" + ) |> + arrange(follow.up) |> + mutate( + death.accum = cumsum(death), + censored.accum = cumsum(censored), + event.accum = death.accum + censored.accum, + n.at.risk = n.pop - c(0, event.accum[-n()]) + ) +dat.accum |> + pivot_longer( + cols = c(death.accum, censored.accum, n.at.risk), + names_to = "type", + values_to = "accum", + ) |> + ggplot(aes(x = follow.up, y = accum, color = type)) + + geom_step(linewidth = 1, direction = "hv") + + my.theme +``` + +```{r} +dat[dat$death == 1,] |> + ggplot(aes(x = follow.up)) + + geom_histogram(bins = 50) +``` + + +```{r} +dat[dat$follow.up == 0,] +``` + +```{r} +head(dat) +``` + +## Cox Model - Age as a Continuous Variable + +```{r, echo=TRUE} +cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) +cox.age.rec |> summary() +``` + +## Cox Model - Age as a Categorical Variable + + +```{r, fig.width=10, fig.height=10} +cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + year, data = dat) +cox.zph(cox.final) +plot(cox.zph(cox.final)) + +ggcoxzph(cox.zph(cox.final), + ggtheme = theme_minimal() + + theme( + legend.position = "bottom", + plot.title = element_text(size = 17, face = "bold"), + axis.title = element_text(size = 15), + axis.text = element_text(size = 12), + legend.text = element_text(size = 12), + strip.text = element_blank(), + plot.margin = margin(20, 30, 20, 30) + )) +``` + +## Table 1 (L) + + +### HLA match `hla.match` + +```{r} +dat$hla.match |> table(useNA = "always") +``` + +```{r} +ggplot(dat, aes(x = hla.match)) + + geom_bar() + + labs(x = "HLA match", y = "Count") + + theme_minimal() + +``` + +```{r} +ggplot(dat, aes(x = hla.match, fill = tx.type)) + + geom_bar(position = "dodge") + + labs(x = "HLA match", y = "Count") + + theme_minimal() +``` + +::: {.callout-warning} +TODO: change to percentage within each `tx.type` +::: + +::: {.callout-note} +Living donors have less `hla.match` then cadaveric donors +::: + + +### Donor age `age.donor` + +```{r} +#| echo: true +mean(dat$age.donor, na.rm = TRUE) +median(dat$age.donor, na.rm = TRUE) +``` + +```{r} +ggplot(dat, aes(x = age.donor)) + + geom_bar() + + labs(x = "Donor Age", y = "Count") + + theme_minimal() +``` + +```{r, fig.width = 8, fig.height = 12} +ggplot(dat, aes(x = age.donor)) + + geom_bar() + + labs(x = "Donor Age", y = "Count") + + theme_minimal() + + facet_grid(hla.match ~ .) +``` + +```{r, fig.width= 8, fig.height= 12} +ggplot(dat, aes(x = age.donor, color = tx.type)) + + geom_density() + + labs(x = "Donor Age", y = "Density") + + theme_minimal() + + facet_grid(hla.match ~ .) +``` + +```{r} +ggplot(dat, aes(x = age.donor, color = hla.match)) + + geom_density() + + labs(x = "Donor Age", y = "Count") + + theme_minimal() +``` + +```{r} +ggplot(dat, aes(x = as.factor(tx.type), y = age.donor)) + + geom_boxplot() + + theme_minimal() +``` + +::: {.callout-note} +There is age difference between different `tx.type` +::: + + +### Recipient Age `age.rec` + +```{r} +#| echo: true +mean(dat$age.rec, na.rm = TRUE) +median(dat$age.rec, na.rm = TRUE) +``` + + +```{r} +ggplot(dat, aes(x = age.rec)) + + geom_bar() + + labs(x = "Recipient Age", y = "Count") + + theme_minimal() + +``` + +```{r} +ggplot(dat, aes(x = as.factor(age.rec), y = age.donor)) + + geom_boxplot() + + theme_minimal() + +``` + +```{r} +ggplot(dat, aes(x = age.rec, fill = tx.type)) + + geom_bar(position = "dodge") + + theme_minimal() + +``` + +```{r} +ggplot(dat, aes(x = age.rec, color = hla.match)) + + geom_density() + + theme_minimal() +``` + +```{r} +ggplot(dat, aes(x = as.factor(age.rec), y = cold.isc)) + + geom_boxplot() + + theme_minimal() +``` + + +### Cold Ischemia Time `cold.isc` + +```{r} +#| echo: true +mean(dat$cold.isc, na.rm = TRUE) +median(dat$cold.isc, na.rm = TRUE) +``` + +```{r} +ggplot(dat, aes(x = cold.isc)) + + geom_density() + + labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + + theme_minimal() +``` + +```{r} +ggplot(dat, aes(x = cold.isc, color = tx.type, group = tx.type)) + + geom_density() + + labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + + theme_minimal() +``` + +```{r} +ggplot(dat, aes(x = cold.isc)) + + geom_density() + + theme_minimal() + + facet_grid(tx.type ~ .) +``` + +### Transplant Type `tx.type` + +```{r} +dat$tx.type |> table(useNA = "always") +``` + +```{r} +#| echo: true +dat$tx.type |> table() +``` + +```{r} +ggplot(dat, aes(x = tx.type)) + + geom_bar() + + labs(x = "Transplant Type", y = "Count") + + theme_minimal() + +``` + +### Year `year` + +```{r} +dat$year |> table() +``` + +```{r} +ggplot(dat, aes(x = year)) + + geom_bar() + + labs(x = "Year", y = "Count") + + theme_minimal() +``` + +```{r} +ggplot(dat, aes(x = year, y = follow.up)) + + geom_boxplot() +``` + +```{r} +ggplot(dat, aes(x = year, fill = tx.type)) + + geom_bar(position = "dodge") +``` + +```{r} +ggplot(dat, aes(x = year, y = age.rec)) + + geom_boxplot() +``` + +```{r} +ggplot(dat, aes(x = year, y = age.donor)) + + geom_boxplot() +``` + +## Kaplan-Meier + +### Overall (L) + +```{r} +km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) +summary(km_all, times = c(4, 8, 12)) +``` + +```{r} +km_all |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_y_continuous(limits = c(0, 1), labels = scales::label_percent()) + + labs( + x = "Years of Follow-up", + y = "Overall Survival Probability" + ) + + theme_minimal() +``` + +### `tx.type` (M) + +```{r, fig.width=7, fig.height=5} +km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat) + +km.tx |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_x_continuous( + breaks = seq(0, 20, by = 4) + ) + + scale_y_continuous( + limits = c(0, 1), + breaks = seq(0, 1, by = 0.2), + labels = scales::label_number(accuracy = 0.1) + ) + + labs( + x = "Years of Follow-up", + y = "Overall Survival Probability" + ) + + theme_minimal() + + theme( + legend.position = "bottom", + plot.title = element_text(size = 17, face = "bold"), + axis.title = element_text(size = 15), + axis.text = element_text(size = 10), + legend.text = element_text(size = 12), + plot.margin = margin(20, 30, 20, 30) + ) +``` + +## Cox Model + +```{r} +get.life.table <- function(dat, time.intervals) { + n.pop <- nrow(dat) + + dat |> + recode.dat(time.intervals) |> + group_by(fu.interval) |> + summarize( + n.censored = sum(.data$death == 0), + n.event = sum(.data$death), + ) |> + ungroup() |> + calculate.hazard(n.pop) +} + +get.life.table.by.groups <- function(dat, time.intervals, grps) { + grps |> + lapply(function(grp) { + dat |> + get.life.table.by.group(time.intervals, grp) |> + mutate( + grp.name = grp, + grp.value = pick(1)[[1]] + ) |> + select(-1) + }) |> + bind_rows() +} + +get.life.table.by.group <- function(dat, time.intervals, grp) { + dat |> + recode.dat(time.intervals) |> + group_by(fu.interval, .data[[grp]]) |> + summarize( + n.censored = sum(.data$death == 0), + n.event = sum(.data$death), + .groups = "keep" + ) |> + ungroup(fu.interval) |> + group_modify(function(df.sub, grp) { + grp.name <- names(grp) + grp.value <- grp[[1]] + n.pop <- (dat[[grp.name]] == grp.value) |> sum() + calculate.hazard(df.sub, n.pop) + }) |> + ungroup() +} + +calculate.hazard <- function(life.table, n.pop) { + n.removed <- life.table$n.event + life.table$n.censored + n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) + life.table |> + mutate( + n.at.risk = n.pop - n.removed.accum, + # TODO: how to account for censored? How do we adjust for uneven interval? + hazard.rate = n.event / n.at.risk + ) +} + +recode.dat <- function(dat, time.intervals) { + df <- dat[dat$follow.up <= sum(time.intervals), ] + time.points <- cumsum(time.intervals) + df$fu.interval <- sapply(df$follow.up, function(time) { + time.points[time <= time.points][1] + }) + + df +} +``` + +### `death` Distribution (?) + +```{r} +# dat |> +# mutate( +# accum.death = cumsum(death), +# accum.censored = if_else(death == 1, 0, 1) |> cumsum() +# ) |> +# ggplot() + +# geom_pont(aes(x = follow.up, y = accum.death, color = death)) +``` + +```{r} + +``` + +```{r} +plot.death = dat[dat$death == 1,] |> + ggplot(aes(x = follow.up)) + + geom_histogram(bins = 50) +plot.death +``` + + +### Overall (L) + +::: {.callout-note} +skip overall, jump directly to `tx.type` +::: + +```{r} +time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) +get.life.table(dat, time.intervals) +``` + +### `tx.type` (M) + + +### `age` (continuous) (L) + +### `age` (categorical) (M) + +### Full model (L) + +```{r} +m1 <- coxph(Surv(follow.up, death) ~ hla.match + tx.type, data = dat) +summary(m1) + +m2 <- coxph(Surv(follow.up, death) ~ hla.match * tx.type, data = dat) +summary(m2) + +anova(m1, m2, test = "LRT") +``` + +### Discussion: include `year` or not? + +## Assumption Testing + + + ## Why Survival Analysis (L) + +Motivation: study the distribution of time to event $T$. + +Example: time of death after kidney transplant. + + +```{r} +ex <- data.frame( + id = c(1, 2, 3, 4, 5), + transplant = c(2000, 2000, 2001, 2003, 2004), + death = c(2005, 2009, 2005, 2004, 2016) +) + +ex_table_1 <- ex |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death = "Death" + ) %>% + cols_align(align = "center", columns = everything()) + +ex_plot_real_time <- ggplot(ex) + + geom_segment(aes(x = transplant, xend = death, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + +```{r} +ex_table_1 +ex_plot_real_time +``` + +--- + +We then calculate time to death for each respondents. + +With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X` + +```{r} +ex_t <- ex |> + mutate( + t = death - transplant + ) + +ex_table_2 <- ex_t |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death = "Death", + t = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_uniform_time <- ggplot(ex_t) + + geom_segment(aes(x = 0, xend = t, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() + +``` + + +```{r} +ex_table_2 +ex_plot_uniform_time +``` + +--- + +What if we do not know when exactly some respondents die? + +Scenario 1: the study ends at the year 2008? + +```{r} +ex_t_c <- ex_t |> + mutate( + death_censored = if_else(death <= 2008, death, 2008), + death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"), + status = if_else(death <= 2008, 1, 0), + t_1 = death_censored - transplant, + t_1_txt = if_else(death <= 2008, as.character(t_1), + paste(">", as.character(t_1))) + ) +``` + +```{r} +ex_table_3 <- ex_t_c |> + select(id, transplant, death_censored_txt, t_1_txt) |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death_censored_txt = "Death", + t_1_txt = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_real_time_1 <- ggplot(ex_t_c) + + geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death_censored, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + xlim(2000, 2016) + + geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", + linewidth = 0.8) + + theme_minimal() + + +ex_plot_uniform_time_1 <- ggplot(ex_t_c) + + geom_segment(aes(x = 0, xend = t_1, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t_1, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + + +```{r} +ex_table_3 +ex_plot_real_time_1 +ex_plot_uniform_time_1 +``` + +**Right Censoring**: only observe the event (death) if it occurs before a +certain time (2008). + +--- + +Scenario 2: respondent 3 move away; loss follow up + +```{r} +ex_alt <- ex_t_c +ex_alt$death_censored_txt[3] <- "> 2005" +ex_alt$status[3] <- 0 +ex_alt$t_1_txt[3] <- "> 4" + + +ex_table_4 <- ex_alt |> + select(id, transplant, death_censored_txt, t_1_txt) |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death_censored_txt = "Death", + t_1_txt = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_real_time_2 <- ggplot(ex_alt) + + geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death_censored, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + xlim(2000, 2016) + + geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", + linewidth = 0.8) + + theme_minimal() + + +ex_plot_uniform_time_2 <- ggplot(ex_alt) + + geom_segment(aes(x = 0, xend = t_1, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t_1, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + +```{r} +ex_table_4 +ex_plot_real_time_2 +ex_plot_uniform_time_2 +``` + +--- + +How many patients are right censored? + +```{r} +dat |> + mutate(Overall = "Overall") |> + pivot_longer( + cols = c(Overall, sex, tx.type), + names_to = "Attribute", + values_to = "Category" + ) |> + count(Attribute, Category, death) |> + ggplot(aes(x = Category, y = n, fill = factor(death))) + + geom_col(position = "dodge") + + facet_wrap(~ Attribute, scales = "free_x") + + labs(x = "Group", y = "Count", fill = "Death Status") + + theme_minimal() +``` + +## Challenges in Survival Analysis (L) + +- **Right Censoring**: only observe the event if it occurs before a certain + time. +- **Left Censoring**: event has occurred prior to the start of a research + - Follow up every 3 years. + - Event has occurred -> event happened sometime before follow up. +- **Left Truncation**: delayed entry; respondents are included only if they + survived long enough. + - Start follow up with patients 100 days after the transplant + - Patients dies within 100 day wouldn't be included in the dataset +- **Right Truncation**: respondents are included only if they have already + experienced the event. + - Retrospective analysis from deceased patients between 1990 and 2000. + - Patients not dead before 2000 are not included in the dataset. +
+ diff --git a/slides.html b/slides.html index 80b495b..260d904 100644 --- a/slides.html +++ b/slides.html @@ -2214,6 +2214,48 @@ window.Quarto = {

2 Method & Result

2.1 Explain Dataset (M)

+
+
+
+ +
+
+Things to highlight in figures +
+
+
+
    +
  • sex and recipient age are similar across tx.type

  • +
  • Living donors have less hla.match then cadaveric donors

  • +
  • There is age difference between different tx.type

  • +
+
+
+
+

2.1.1 sex ~ tx.tpye

+
+
+
+ +
+
+Note +
+
+
+

sex are similar across tx.type

+
+
+
+
+
+
+

+
+
+
+
+

2.2 Table 1 (L)

@@ -2230,7 +2272,7 @@ window.Quarto = {
-

+

@@ -2239,11 +2281,37 @@ window.Quarto = {
-

+

+
+
+
+ +
+
+Warning +
+
+
+

TODO: change to percentage within each tx.type

+
+
+
+
+
+ +
+
+Note +
+
+
+

Living donors have less hla.match then cadaveric donors

+
+

2.2.2 Donor age age.donor

@@ -2265,7 +2333,7 @@ window.Quarto = {
-

+

@@ -2278,7 +2346,7 @@ window.Quarto = {
-

+

@@ -2291,28 +2359,15 @@ window.Quarto = {
-

+

-
-
-

2.2.3 Recipient Age age.rec

-
-
mean(dat$age.rec, na.rm = TRUE)
-
-
[1] 11.64653
-
-
median(dat$age.rec, na.rm = TRUE)
-
-
[1] 13
-
-
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_count()`).
+
Warning: Removed 113 rows containing non-finite outside the scale range
+(`stat_density()`).
@@ -2335,23 +2390,36 @@ window.Quarto = {
+
+
+
+ +
+
+Note +
+
+
+

There is age difference between different tx.type

+
+
+
+
+

2.2.3 Recipient Age age.rec

-
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_count()`).
-
-
-
-
-

-
+
mean(dat$age.rec, na.rm = TRUE)
+
+
[1] 11.64653
+
median(dat$age.rec, na.rm = TRUE)
+
+
[1] 13
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_density()`).
+(`stat_count()`).
@@ -2363,7 +2431,7 @@ window.Quarto = {
-
Warning: Removed 2250 rows containing non-finite outside the scale range
+
Warning: Removed 113 rows containing non-finite outside the scale range
 (`stat_boxplot()`).
@@ -2374,22 +2442,22 @@ window.Quarto = {
-
-
-

2.2.4 Cold Ischemia Time cold.isc

-
mean(dat$cold.isc, na.rm = TRUE)
-
-
[1] 10.85967
+
+
Warning: Removed 9 rows containing non-finite outside the scale range
+(`stat_count()`).
+
+
+
+
+

+
-
median(dat$cold.isc, na.rm = TRUE)
-
-
[1] 7
-
Warning: Removed 2250 rows containing non-finite outside the scale range
+
Warning: Removed 9 rows containing non-finite outside the scale range
 (`stat_density()`).
@@ -2403,7 +2471,7 @@ window.Quarto = {
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
+(`stat_boxplot()`).
@@ -2414,10 +2482,69 @@ window.Quarto = {
+
+

2.2.4 Cold Ischemia Time cold.isc

+
+
mean(dat$cold.isc, na.rm = TRUE)
+
+
[1] 10.85967
+
+
median(dat$cold.isc, na.rm = TRUE)
+
+
[1] 7
+
+
+
+
+
Warning: Removed 2250 rows containing non-finite outside the scale range
+(`stat_density()`).
+
+
+
+
+

+
+
+
+
+
+
+
Warning: Removed 2250 rows containing non-finite outside the scale range
+(`stat_density()`).
+
+
+
+
+

+
+
+
+
+
+
+
Warning: Removed 2250 rows containing non-finite outside the scale range
+(`stat_density()`).
+
+
+
+
+

+
+
+
+
+

2.2.5 Transplant Type tx.type

-
dat$tx.type |> table()
+
+

+Cadaveric    Living      <NA> 
+     5148      4627         0 
+
+
+
+
dat$tx.type |> table()

 Cadaveric    Living 
@@ -2428,7 +2555,7 @@ Cadaveric    Living
 
-

+

@@ -2447,7 +2574,7 @@ Cadaveric Living
-

+

@@ -2456,7 +2583,7 @@ Cadaveric Living
-

+

@@ -2465,7 +2592,7 @@ Cadaveric Living
-

+

@@ -2478,7 +2605,7 @@ Cadaveric Living
-

+

@@ -2491,50 +2618,79 @@ Cadaveric Living
-

+

-
-

2.3 Overall Kaplan-Meier

+
+

2.3 Kaplan-Meier

+
+

2.3.1 Overall (L)

+
+
+
Call: survfit(formula = Surv(follow.up, death) ~ 1, data = dat)
+
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    4   4150     359    0.953 0.00252        0.948        0.958
+    8   1197      86    0.919 0.00449        0.910        0.928
+   12     48      20    0.888 0.00867        0.871        0.905
+
+
-

+

-
-

2.4 Hazard Functions

+
+

2.3.2 tx.type (M)

+
+
+
+
+

+
+
+
+
+
+
+
+

2.4 Cox Model

2.4.1 death Distribution (?)

-

-
-
-
-
-
-
-
-
-

+

-
-

2.4.2 Overall (?)

+
+

2.4.2 Overall (L)

+
+
+
+ +
+
+Note +
+
+
+

skip overall, jump directly to tx.type

+
+
# A tibble: 7 × 5
@@ -2550,20 +2706,17 @@ Cadaveric    Living
 
-
-

2.4.3 tx.type (M)

+
+

2.4.3 tx.type (M)

+
+

2.4.4 age (continuous) (L)

-
-

2.5 Cox Model

-
-

2.5.1 age (continuous) (L)

+
+

2.4.5 age (categorical) (M)

-
-

2.5.2 age (categorical) (M)

-
-
-

2.5.3 Full model (L)

+
+

2.4.6 Full model (L)

Call:
@@ -2654,12 +2807,12 @@ Score (logrank) test = 69.92  on 13 df,   p=8e-10
-
-

2.5.4 Discussion: include year or not?

+
+

2.4.7 Discussion: include year or not?

-
-

2.6 Assumption Testing

+
+

2.5 Assumption Testing

diff --git a/slides.qmd b/slides.qmd deleted file mode 100644 index 81b33bb..0000000 --- a/slides.qmd +++ /dev/null @@ -1,609 +0,0 @@ ---- -title: Slides -execute: - cache: true - freeze: auto - include: true - echo: false -number-sections: true ---- - -```{r} -library(tidyverse) -library(dplyr) -library(ggplot2) -library(survival) -library(emmeans) -library(foreign) -library(gtsummary) -library(gt) -library(ggsurvfit) -``` - -```{r} -dat <- read.csv("./unos.txt", sep = "\t") -names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death", - "year", "sex", "tx.type", "follow.up") -dat <- dat |> - mutate( - sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), - tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), - hla.match = factor(hla.match), - year = factor(year) - ) -``` - -# Introduction: research question - -## Survival after transplantation (M) - -## Identify Predictors (M) - -## Why Survival Analysis (L) - -Motivation: study the distribution of time to event $T$. - -Example: time of death after kidney transplant. - - -```{r} -ex <- data.frame( - id = c(1, 2, 3, 4, 5), - transplant = c(2000, 2000, 2001, 2003, 2004), - death = c(2005, 2009, 2005, 2004, 2016) -) - -ex_table_1 <- ex |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death = "Death" - ) %>% - cols_align(align = "center", columns = everything()) - -ex_plot_real_time <- ggplot(ex) + - geom_segment(aes(x = transplant, xend = death, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() -``` - -```{r} -ex_table_1 -ex_plot_real_time -``` - ---- - -We then calculate time to death for each respondents. - -With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X` - -```{r} -ex_t <- ex |> - mutate( - t = death - transplant - ) - -ex_table_2 <- ex_t |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death = "Death", - t = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) - -ex_plot_uniform_time <- ggplot(ex_t) + - geom_segment(aes(x = 0, xend = t, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() - -``` - - -```{r} -ex_table_2 -ex_plot_uniform_time -``` - ---- - -What if we do not know when exactly some respondents die? - -Scenario 1: the study ends at the year 2008? - -```{r} -ex_t_c <- ex_t |> - mutate( - death_censored = if_else(death <= 2008, death, 2008), - death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"), - status = if_else(death <= 2008, 1, 0), - t_1 = death_censored - transplant, - t_1_txt = if_else(death <= 2008, as.character(t_1), - paste(">", as.character(t_1))) - ) -``` - -```{r} -ex_table_3 <- ex_t_c |> - select(id, transplant, death_censored_txt, t_1_txt) |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death_censored_txt = "Death", - t_1_txt = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) - -ex_plot_real_time_1 <- ggplot(ex_t_c) + - geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death_censored, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - xlim(2000, 2016) + - geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", - linewidth = 0.8) + - theme_minimal() - - -ex_plot_uniform_time_1 <- ggplot(ex_t_c) + - geom_segment(aes(x = 0, xend = t_1, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t_1, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() -``` - - -```{r} -ex_table_3 -ex_plot_real_time_1 -ex_plot_uniform_time_1 -``` - -**Right Censoring**: only observe the event (death) if it occurs before a -certain time (2008). - ---- - -Scenario 2: respondent 3 move away; loss follow up - -```{r} -ex_alt <- ex_t_c -ex_alt$death_censored_txt[3] <- "> 2005" -ex_alt$status[3] <- 0 -ex_alt$t_1_txt[3] <- "> 4" - - -ex_table_4 <- ex_alt |> - select(id, transplant, death_censored_txt, t_1_txt) |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death_censored_txt = "Death", - t_1_txt = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) - -ex_plot_real_time_2 <- ggplot(ex_alt) + - geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death_censored, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - xlim(2000, 2016) + - geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", - linewidth = 0.8) + - theme_minimal() - - -ex_plot_uniform_time_2 <- ggplot(ex_alt) + - geom_segment(aes(x = 0, xend = t_1, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t_1, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() -``` - -```{r} -ex_table_4 -ex_plot_real_time_2 -ex_plot_uniform_time_2 -``` - ---- - -How many patients are right censored? - -```{r} -dat |> - mutate(Overall = "Overall") |> - pivot_longer( - cols = c(Overall, sex, tx.type), - names_to = "Attribute", - values_to = "Category" - ) |> - count(Attribute, Category, death) |> - ggplot(aes(x = Category, y = n, fill = factor(death))) + - geom_col(position = "dodge") + - facet_wrap(~ Attribute, scales = "free_x") + - labs(x = "Group", y = "Count", fill = "Death Status") + - theme_minimal() -``` - -## Challenges in Survival Analysis (L) - -- **Right Censoring**: only observe the event if it occurs before a certain - time. -- **Left Censoring**: event has occurred prior to the start of a research - - Follow up every 3 years. - - Event has occurred -> event happened sometime before follow up. -- **Left Truncation**: delayed entry; respondents are included only if they - survived long enough. - - Start follow up with patients 100 days after the transplant - - Patients dies within 100 day wouldn't be included in the dataset -- **Right Truncation**: respondents are included only if they have already - experienced the event. - - Retrospective analysis from deceased patients between 1990 and 2000. - - Patients not dead before 2000 are not included in the dataset. - -# Method & Result - -## Explain Dataset (M) - -## Table 1 (L) - - -### HLA match `hla.match` - -```{r} -dat$hla.match |> table(useNA = "always") -``` - -```{r} -ggplot(dat, aes(x = hla.match)) + - geom_bar() + - labs(x = "HLA match", y = "Count") + - theme_minimal() - -``` - -```{r} -ggplot(dat, aes(x = hla.match, fill = tx.type)) + - geom_bar(position = "dodge") + - labs(x = "HLA match", y = "Count") + - theme_minimal() -``` - -### Donor age `age.donor` - -```{r} -#| echo: true -mean(dat$age.donor, na.rm = TRUE) -median(dat$age.donor, na.rm = TRUE) -``` - -```{r} -ggplot(dat, aes(x = age.donor)) + - geom_bar() + - labs(x = "Donor Age", y = "Count") + - theme_minimal() -``` - -```{r} -#| fig-width: 8 -#| fig-height: 12 -ggplot(dat, aes(x = age.donor)) + - geom_bar() + - labs(x = "Donor Age", y = "Count") + - theme_minimal() + - facet_grid(hla.match ~ .) -``` - -```{r} -ggplot(dat, aes(x = age.donor, color = hla.match)) + - geom_density() + - labs(x = "Donor Age", y = "Count") + - theme_minimal() -``` - - -### Recipient Age `age.rec` - -```{r} -#| echo: true -mean(dat$age.rec, na.rm = TRUE) -median(dat$age.rec, na.rm = TRUE) -``` - - -```{r} -ggplot(dat, aes(x = age.rec)) + - geom_bar() + - labs(x = "Recipient Age", y = "Count") + - theme_minimal() - -``` - -```{r} -ggplot(dat, aes(x = as.factor(age.rec), y = age.donor)) + - geom_boxplot() + - theme_minimal() - -``` - -```{r} -ggplot(dat, aes(x = age.rec, fill = tx.type)) + - geom_bar(position = "dodge") + - theme_minimal() - -``` - -```{r} -ggplot(dat, aes(x = age.rec, color = hla.match)) + - geom_density() + - theme_minimal() -``` - -```{r} -ggplot(dat, aes(x = as.factor(age.rec), y = cold.isc)) + - geom_boxplot() + - theme_minimal() -``` - - -### Cold Ischemia Time `cold.isc` - -```{r} -#| echo: true -mean(dat$cold.isc, na.rm = TRUE) -median(dat$cold.isc, na.rm = TRUE) -``` - -```{r} -ggplot(dat, aes(x = cold.isc)) + - geom_density() + - labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + - theme_minimal() -``` - -```{r} -ggplot(dat, aes(x = cold.isc, color = tx.type, group = tx.type)) + - geom_density() + - labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + - theme_minimal() -``` - -### Transplant Type `tx.type` - -```{r} -#| echo: true -dat$tx.type |> table() -``` - -```{r} -ggplot(dat, aes(x = tx.type)) + - geom_bar() + - labs(x = "Transplant Type", y = "Count") + - theme_minimal() - -``` - -### Year `year` - -```{r} -dat$year |> table() -``` - -```{r} -ggplot(dat, aes(x = year)) + - geom_bar() + - labs(x = "Year", y = "Count") + - theme_minimal() -``` - -```{r} -ggplot(dat, aes(x = year, y = follow.up)) + - geom_boxplot() -``` - -```{r} -ggplot(dat, aes(x = year, fill = tx.type)) + - geom_bar(position = "dodge") -``` - -```{r} -ggplot(dat, aes(x = year, y = age.rec)) + - geom_boxplot() -``` - -```{r} -ggplot(dat, aes(x = year, y = age.donor)) + - geom_boxplot() -``` - -## Overall Kaplan-Meier - -```{r} -km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) -``` - -```{r} -km_all |> - ggsurvfit(type = "survival") + - add_confidence_interval() + - scale_y_continuous(limits = c(0, 1), labels = scales::label_percent()) + - labs( - x = "Years of Follow-up", - y = "Overall Survival Probability" - ) + - theme_minimal() -``` - -## Hazard Functions - -```{r} -get.life.table <- function(dat, time.intervals) { - n.pop <- nrow(dat) - - dat |> - recode.dat(time.intervals) |> - group_by(fu.interval) |> - summarize( - n.censored = sum(.data$death == 0), - n.event = sum(.data$death), - ) |> - ungroup() |> - calculate.hazard(n.pop) -} - -get.life.table.by.groups <- function(dat, time.intervals, grps) { - grps |> - lapply(function(grp) { - dat |> - get.life.table.by.group(time.intervals, grp) |> - mutate( - grp.name = grp, - grp.value = pick(1)[[1]] - ) |> - select(-1) - }) |> - bind_rows() -} - -get.life.table.by.group <- function(dat, time.intervals, grp) { - dat |> - recode.dat(time.intervals) |> - group_by(fu.interval, .data[[grp]]) |> - summarize( - n.censored = sum(.data$death == 0), - n.event = sum(.data$death), - .groups = "keep" - ) |> - ungroup(fu.interval) |> - group_modify(function(df.sub, grp) { - grp.name <- names(grp) - grp.value <- grp[[1]] - n.pop <- (dat[[grp.name]] == grp.value) |> sum() - calculate.hazard(df.sub, n.pop) - }) |> - ungroup() -} - -calculate.hazard <- function(life.table, n.pop) { - n.removed <- life.table$n.event + life.table$n.censored - n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) - life.table |> - mutate( - n.at.risk = n.pop - n.removed.accum, - # TODO: how to account for censored? How do we adjust for uneven interval? - hazard.rate = n.event / n.at.risk - ) -} - -recode.dat <- function(dat, time.intervals) { - df <- dat[dat$follow.up <= sum(time.intervals), ] - time.points <- cumsum(time.intervals) - df$fu.interval <- sapply(df$follow.up, function(time) { - time.points[time <= time.points][1] - }) - - df -} -``` - -### `death` Distribution (?) - -```{r} -dat |> - mutate( - accum.death = cumsum(death), - accum.censored = if_else(death == 1, 0, 1) |> cumsum() - ) |> - ggplot() + - geom_step(aes(x = follow.up, y = accum.death)) -``` - -```{r} - -``` - -```{r} -plot.death = dat[dat$death == 1,] |> - ggplot(aes(x = follow.up)) + - geom_histogram(bins = 50) -plot.death -``` - - -### Overall (?) - -```{r} -time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) -get.life.table(dat, time.intervals) -``` - -### `tx.type` (M) - - -```{r} - -``` - - -## Cox Model - -### `age` (continuous) (L) - -### `age` (categorical) (M) - -### Full model (L) - -```{r} -m1 <- coxph(Surv(follow.up, death) ~ hla.match + tx.type, data = dat) -summary(m1) - -m2 <- coxph(Surv(follow.up, death) ~ hla.match * tx.type, data = dat) -summary(m2) - -anova(m1, m2, test = "LRT") -``` - -### Discussion: include `year` or not? - -## Assumption Testing - -# Conclusion (M + L)