diff --git a/publish.sh b/publish.sh index 04c97b0..f319fd5 100644 --- a/publish.sh +++ b/publish.sh @@ -19,6 +19,6 @@ git add -A git commit -m "Deploy static site" # Push this folder as the gh-pages branch of chmee/kidney -git push -f git@github.com:chmlee/kidney.git master:gh-pages +git push -f git@github.com:chmee/kidney.git main:gh-pages cd - diff --git a/report.Rmd b/report.Rmd index f51fe1b..89e97dc 100644 --- a/report.Rmd +++ b/report.Rmd @@ -14,7 +14,6 @@ library(survival) library(emmeans) library(foreign) library(gtsummary) -library(ggsurvfit) ``` ```{r} @@ -115,29 +114,6 @@ table.split <- dat.table1 |> table.split ``` -```{r} -table.split <- dat.table1 |> - select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> - tbl_summary( - by = tx.type, - statistic = list( - all_continuous() ~ "{median} ({p25}, {p75})" - ), - label = list( - hla.match ~ "HLA matches, n(%)", - age.donor ~ "Donor age, median (IQR)", - age.rec ~ "Recipient age, median (IQR)", - cold.isc ~ "Cold ischemic time (hours), median (IQR), ", - sex ~ "Sex, n(%)" - ), - missing = "ifany" - ) |> - add_overall() |> - modify_footnote(all_stat_cols() ~ NA) - -table.split -``` - Calculate median follow-up (reverse Kaplan-Meier method), with 95% CI ```{r} @@ -391,6 +367,9 @@ recode.dat <- function(dat, time.intervals) { df } + + + ``` ```{r} @@ -546,25 +525,29 @@ recode.age <- function(dat, ages) { dat.age.rec <- dat[!is.na(dat$age.rec), ] class.age <- function(age) { if (age < 2) { - return("0-1") + return("0,1") } else if (age < 6) { - return("2-5") + return("2,3,4,5") } else if (age < 11) { - return("6-10") + return("6,7,8,9,10") } else if (age < 19) { return("11-18") } } +``` + +```{r} dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |> factor(levels = c( - "0-1", - "2-5", - "6-10", + "0,1", + "2,3,4,5", + "6,7,8,9,10", "11-18" )) cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec) +cox.age.rec |> summary() ``` ```{r} @@ -572,7 +555,6 @@ emmeans(cox.age.rec, pairwise ~ age.group, type = "response") ``` - # Exercise 7 Fit a multivariate Cox model by using other predictors and @@ -699,7 +681,6 @@ H1: PH-assumption doesn't hold # different cox model!! cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat) cox.zph(cox.final) -cox.final ``` @@ -710,11 +691,6 @@ A non-random pattern or slope in a plot of scaled residuals against time indicat plot(cox.zph(cox.final)) ``` -```{r, fig.width=10, fig.height=10} -plot(cox.zph(cox.final)) -``` - - NOTES: diff --git a/slides.Rmd b/slides.Rmd deleted file mode 100644 index 024b4b4..0000000 --- a/slides.Rmd +++ /dev/null @@ -1,1233 +0,0 @@ ---- -title: Slides -execute: - cache: true - freeze: auto - include: true - echo: false - warning: false -number-sections: true ---- - -```{r} -library(tidyverse) -library(dplyr) -library(ggplot2) -library(survival) -library(emmeans) -library(foreign) -library(gtsummary) -library(gt) -library(ggsurvfit) -library(gridExtra) -library(survminer) -library(viridis) -``` - - -```{r} -dat <- read.csv("./unos.txt", sep = "\t") -names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death", - "year", "sex", "tx.type", "follow.up") -dat <- dat |> - mutate( - sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), - tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), - hla.match = factor(hla.match), - year = factor(year) - ) -``` - -```{r} -my.theme <- theme_minimal() + - theme( - legend.position = "bottom", - plot.title = element_text(size = 17, face = "bold"), - axis.title = element_text(size = 15), - axis.text = element_text(size = 12), - legend.text = element_text(size = 12), - plot.margin = margin(20, 30, 20, 30) - ) -``` - -::: {.callout-warning} -There are 25 repondents with `follow.up = 0`, 21 of which with `death = 0` and -other 4 with `death = 1`. - -```{r, echo=TRUE} -dat[dat$follow.up == 0, ] |> - group_by(death) |> - summarise(n = n()) -``` - -Reasons to omit them, that is, have `dat <- dat[dat$follow.up > 0,]` at the -very beginning: - -- If `death = 1`, transplant failed. They shouldn't be included in the research - question "survival rate of a *successful* tranplant". -- If `death = 0`, transplant was successful, but they were immediately removed - from populatoin at risk, contributiong nothing to the analysis. -::: - -# Title - -# Table of Contents (?) - -# Introduction (M) - -# Method & Result - -## Identify Predictors (M) - -- Data source -- Study population -```{r} -nrow(dat) -``` -- Covariates -- Outcome -```{r} -names(dat) -``` - -## Table (L) - -```{r} -dat |> - select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> - tbl_summary( - by = tx.type, - statistic = list( - all_continuous() ~ "{median} ({p25}, {p75})" - ), - label = list( - hla.match ~ "HLA matches, n(%)", - age.donor ~ "Donor age, median (IQR)", - age.rec ~ "Recipient age, median (IQR)", - cold.isc ~ "Cold ischemic time (hours), median (IQR), ", - sex ~ "Sex, n(%)" - ), - missing = "ifany" - ) |> - add_overall() |> - modify_footnote(all_stat_cols() ~ NA) -``` - -# Elementary Analysis: Transplant Type (L) - -## Transplant Type vs Sex - -`sex` are similar across `tx.type` - -```{r, fig.width=7, fig.height=5} -plot.tx.sex <- dat |> - mutate(Overall = "Overall") |> - pivot_longer( - cols = c(tx.type, Overall), - names_to = "Attribute", - values_to = "Category" - ) |> - mutate( - Attribute = recode( - Attribute, - "tx.type" = "Transplant Type" - ) - ) |> - count(Attribute, Category, sex) |> - group_by(Attribute, Category) |> - mutate( - total = sum(n), - percent = n / total - ) |> - ungroup() |> - ggplot(aes(x = Category, y = percent, fill = sex)) + - geom_col(position = "dodge") + - facet_grid( - ~ Attribute, - scales = "free_x", - space = "free_x" - ) + - labs( - title = "Percentage of Sex by Transplant Type", - x = "Group", - y = "Percentage", - fill = NULL - ) + - scale_x_discrete( - labels = c("Cadaveric" = "Deceased") - ) + - my.theme -plot.tx.sex -``` - -## Transplant Type vs Recipient Age - -```{r, fig.width=7, fig.height=5} -plot.tx.rec <- ggplot(data = dat) + - geom_density(aes(x = age.rec, color = tx.type)) + - geom_density( - aes(x = age.rec, color = "Overall"), - linewidth = 0.5, - linetype = "dashed" - ) + - scale_color_manual( - values = c( - "Cadaveric" = "darkorange", - "Living" = "darkgreen", - "Overall" = "gray50" - ), - labels = c( - "Cadaveric" = "Deceased" - ) - ) + - labs( - title = "Density of Recipient Age by Transplant Type", - x = "Recipient Age", - y = "Density", - color = NULL - ) + - my.theme -plot.tx.rec -``` - - -# Elementary Analysis: Transplant Type (Cont.) - -## Transplant Type vs HLA Match - -Living donors have less `hla.match` then cadaveric donors. - -```{r, fig.width=7, fig.height=5} -ggplot(data = dat) + - geom_bar(aes(x = hla.match)) + - facet_wrap(tx.type ~ ., ncol = 1) + - labs( - title = "Distribution of HLA Match by Transplant Type", - x = "HLA match", - y = "Count", - color = NULL - ) + - my.theme - -``` - -## Transplant Type vs Donor Age - -```{r, fig.width=7, fig.height=5} -plot.tx.donor <- ggplot(data = dat) + - geom_density(aes(x = age.donor, color = tx.type)) + - geom_density( - aes(x = age.donor, color = "Overall"), - linewidth = 0.5, - linetype = "dashed" - ) + - scale_color_manual( - values = c( - "Cadaveric" = "darkorange", - "Living" = "darkgreen", - "Overall" = "gray50" - ), - labels = c( - "Cadaveric" = "Deceased" - ) - ) + - labs( - title = "Density of Donor Age by Transplant Type", - x = "Recipient Age", - y = "Density", - color = NULL - ) + - my.theme - -plot.tx.donor -``` - -# Overall Kaplan-Meier Curve - -```{r} -km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) -``` - -```{r, fig.width=7, fig.height=5} -km_all |> - ggsurvfit(type = "survival") + - add_confidence_interval() + - scale_y_continuous(limits = c(0, 1)) + - labs( - title = "Kaplan-Meier Curve for Overall Survival", - x = "Years of Follow-up", - y = "Survival Probability", - ) + - my.theme -``` - -```{r} -summary(km_all, times = c(0, 4, 8, 12)) -``` - -# Cox Model (with intervals?) - -# Cox Model: Transplant Type - -```{r} -cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat) -summary(cox.tx) -``` - -```{r} -km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat) -``` - -```{r, fig.width=7, fig.height=5} -km.tx |> - ggsurvfit(type = "survival") + - add_confidence_interval() + - scale_color_discrete( - labels = c("Deceased", "Living") - ) + - scale_fill_discrete( - labels = c("Deceased", "Living") - ) + - labs( - title = "Survival by Donor Type", - x = "Years of Follow-up", - y = "Survival Probability", - color = "Donor Type", - fill = "Donor Type" - ) + - my.theme -``` - -```{r} -summary(km.tx, times = c(0, 4, 8, 12)) -``` - -# Cox Model: Age Recipient (Continuous) - -```{r} -cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) -summary(cox.age.rec) -``` - -```{r} -km.age.rec <- survfit(Surv(follow.up, death) ~ age.rec, data = dat) -``` - -```{r, fig.width=7, fig.height=7} -km.age.rec |> - ggsurvfit(type = "survival") + - scale_color_discrete( - labels = seq(1, 18) - ) + - labs( - title = "Survival by Recipient Age", - x = "Years of Follow-up", - y = "Survival Probability", - color = "Recipient Age", - ) + - my.theme -``` - -
-View life table - -```{r} -summary(km.age.rec, times = c(0, 4, 8, 12)) -``` -
- - -# Cox Model: Age Recipient (Categorical) - -```{r} -class.age <- function(age) { - if (is.na(age)) return(NA) - if (age < 2) { - return("0-1") - } else if (age < 6) { - return("2-5") - } else if (age < 11) { - return("6-10") - } else if (age < 19) { - return("11-18") - } -} - -dat.age.rec.group <- dat -dat.age.rec.group$age.rec.group = sapply(dat$age.rec, class.age) |> - factor(levels = c( - "0-1", - "2-5", - "6-10", - "11-18" - )) -cox.age.rec.group <- coxph(Surv(follow.up, death) ~ age.rec.group, - data = dat.age.rec.group) -``` - - -```{r, echo=TRUE} -cox.age.rec.group -``` - - - - -```{r, echo=TRUE} -eme.age.rec.group <- emmeans(cox.age.rec.group, pairwise ~ age.rec.group, - type = "response") -eme.age.rec.group -``` - - -```{r} -km.age.rec.group <- survfit(Surv(follow.up, death) ~ age.rec.group, - data = dat.age.rec.group) -``` - -```{r, fig.width=7, fig.height=7} -km.age.rec.group |> - ggsurvfit(type = "survival") + - add_confidence_interval() + - scale_color_discrete( - labels = c("0-1", "2-5", "6-10", "11-18") - ) + - scale_fill_discrete( - labels = c("0-1", "2-5", "6-10", "11-18") - ) + - labs( - title = "Survival by Recipient Age Group", - x = "Years of Follow-up", - y = "Survival Probability", - color = "Recipient Age Group", - fill = "Recipient Age Group", - ) + - my.theme -``` - -```{r, fig.width=7, fig.height=7} -eme.age.rec.group$contrast |> - as.data.frame() |> - select(contrast, ratio, SE, p.value) |> - mutate( - contrast = factor(contrast), - log.ratio = log(ratio), - SE.log = SE / ratio, - low = exp(log.ratio - 1.96 * SE.log), - up = exp(log.ratio + 1.96 * SE.log) - ) |> - ggplot(aes(y = reorder(contrast, ratio))) + - geom_errorbarh(aes(xmin = low, xmax = up)) + - geom_point(aes(x = ratio)) + - geom_vline(xintercept = 1, linetype = "dashed") + - scale_y_discrete( - labels = function(x) { - gsub("[()]", "", x = x) - } - ) + - labs( - x = "Hazard Ratio", - y = "Recipient Age Group Comparison", - title = "Pairwise Hazard Ratios by Recipient Age Groups" - ) + - my.theme -``` - -# Identifying Predictors - -TODO: write a more generic function - -```{r} -Waldtest <- function(cox.fit, i.betas){ - q <- length(i.betas) - coefs <- cox.fit$coefficients - var <- cox.fit$var[i.betas, i.betas] - Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] - p.value <- 1-pchisq(as.numeric(Wald), df = q) - print(p.value) - return(p.value) -} -``` - -```{r} -vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", - "tx.type") -included <- vars == "tx.type" -included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) -included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) - -find.best.surv <- function(included) { - print('---') - test.vars <- vars[!included] - p.values <- sapply(test.vars, function(test.var) { - new.surv <- reformulate( - termlabels = c(vars[included], test.var), - response = "Surv(follow.up, death)" - ) - fit.tx <- coxph(new.surv, data = dat) - Waldtest(fit.tx, sum(included)) - }) - if (sum(p.values > 0.05) == sum(included)) { - print(3) - return(included) - } - if (sum(included == TRUE) == length(included)) { - print(2) - return(included) - } - print(1) - print(which.min(p.values)) - print(included) - print(p.values) - print(which.min(p.values)) - included[which.min(p.values)] <- TRUE - print(included) - # find.best.surv(included) -} - - -find.best.surv(included) - -``` - -```{r} -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match -fit.cox.model(surv, 3) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor -fit.cox.model(surv, 3) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc -fit.cox.model(surv, 3) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + year -fit.cox.model(surv, 3) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex -fit.cox.model(surv, 3) -``` - - -``` - ---- - - -# Old - -
- -## Death & Censored - - -```{r, echo=TRUE} -dat |> - group_by(death) |> - summarise( - n = n(), - percentage = n() / nrow(dat) * 100 - ) -``` - -Most of the patients (95.24%) were censored. - -```{r} -n.pop <- nrow(dat) -dat.accum <- dat[dat$follow.up > 0,] |> - select(follow.up, death) |> - mutate(censored = ifelse(death == 1, 0, 1)) |> - group_by(follow.up) |> - summarize( - death = sum(death), - censored = sum(censored), - .groups = "drop" - ) |> - arrange(follow.up) |> - mutate( - death.accum = cumsum(death), - censored.accum = cumsum(censored), - event.accum = death.accum + censored.accum, - n.at.risk = n.pop - c(0, event.accum[-n()]) - ) -dat.accum |> - pivot_longer( - cols = c(death.accum, censored.accum, n.at.risk), - names_to = "type", - values_to = "accum", - ) |> - ggplot(aes(x = follow.up, y = accum, color = type)) + - geom_step(linewidth = 1, direction = "hv") + - my.theme -``` - -```{r} -dat[dat$death == 1,] |> - ggplot(aes(x = follow.up)) + - geom_histogram(bins = 50) -``` - - -```{r} -dat[dat$follow.up == 0,] -``` - -```{r} -head(dat) -``` - -## Cox Model - Age as a Continuous Variable - -```{r, echo=TRUE} -cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) -cox.age.rec |> summary() -``` - -## Cox Model - Age as a Categorical Variable - - -```{r, fig.width=10, fig.height=10} -cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + year, data = dat) -cox.zph(cox.final) -plot(cox.zph(cox.final)) - -ggcoxzph(cox.zph(cox.final), - ggtheme = theme_minimal() + - theme( - legend.position = "bottom", - plot.title = element_text(size = 17, face = "bold"), - axis.title = element_text(size = 15), - axis.text = element_text(size = 12), - legend.text = element_text(size = 12), - strip.text = element_blank(), - plot.margin = margin(20, 30, 20, 30) - )) -``` - -## Table 1 (L) - - -### HLA match `hla.match` - -```{r} -dat$hla.match |> table(useNA = "always") -``` - -```{r} -ggplot(dat, aes(x = hla.match)) + - geom_bar() + - labs(x = "HLA match", y = "Count") + - theme_minimal() - -``` - -```{r} -ggplot(dat, aes(x = hla.match, fill = tx.type)) + - geom_bar(position = "dodge") + - labs(x = "HLA match", y = "Count") + - theme_minimal() -``` - -::: {.callout-warning} -TODO: change to percentage within each `tx.type` -::: - -::: {.callout-note} -Living donors have less `hla.match` then cadaveric donors -::: - - -### Donor age `age.donor` - -```{r} -#| echo: true -mean(dat$age.donor, na.rm = TRUE) -median(dat$age.donor, na.rm = TRUE) -``` - -```{r} -ggplot(dat, aes(x = age.donor)) + - geom_bar() + - labs(x = "Donor Age", y = "Count") + - theme_minimal() -``` - -```{r, fig.width = 8, fig.height = 12} -ggplot(dat, aes(x = age.donor)) + - geom_bar() + - labs(x = "Donor Age", y = "Count") + - theme_minimal() + - facet_grid(hla.match ~ .) -``` - -```{r, fig.width= 8, fig.height= 12} -ggplot(dat, aes(x = age.donor, color = tx.type)) + - geom_density() + - labs(x = "Donor Age", y = "Density") + - theme_minimal() + - facet_grid(hla.match ~ .) -``` - -```{r} -ggplot(dat, aes(x = age.donor, color = hla.match)) + - geom_density() + - labs(x = "Donor Age", y = "Count") + - theme_minimal() -``` - -```{r} -ggplot(dat, aes(x = as.factor(tx.type), y = age.donor)) + - geom_boxplot() + - theme_minimal() -``` - -::: {.callout-note} -There is age difference between different `tx.type` -::: - - -### Recipient Age `age.rec` - -```{r} -#| echo: true -mean(dat$age.rec, na.rm = TRUE) -median(dat$age.rec, na.rm = TRUE) -``` - - -```{r} -ggplot(dat, aes(x = age.rec)) + - geom_bar() + - labs(x = "Recipient Age", y = "Count") + - theme_minimal() - -``` - -```{r} -ggplot(dat, aes(x = as.factor(age.rec), y = age.donor)) + - geom_boxplot() + - theme_minimal() - -``` - -```{r} -ggplot(dat, aes(x = age.rec, fill = tx.type)) + - geom_bar(position = "dodge") + - theme_minimal() - -``` - -```{r} -ggplot(dat, aes(x = age.rec, color = hla.match)) + - geom_density() + - theme_minimal() -``` - -```{r} -ggplot(dat, aes(x = as.factor(age.rec), y = cold.isc)) + - geom_boxplot() + - theme_minimal() -``` - - -### Cold Ischemia Time `cold.isc` - -```{r} -#| echo: true -mean(dat$cold.isc, na.rm = TRUE) -median(dat$cold.isc, na.rm = TRUE) -``` - -```{r} -ggplot(dat, aes(x = cold.isc)) + - geom_density() + - labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + - theme_minimal() -``` - -```{r} -ggplot(dat, aes(x = cold.isc, color = tx.type, group = tx.type)) + - geom_density() + - labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + - theme_minimal() -``` - -```{r} -ggplot(dat, aes(x = cold.isc)) + - geom_density() + - theme_minimal() + - facet_grid(tx.type ~ .) -``` - -### Transplant Type `tx.type` - -```{r} -dat$tx.type |> table(useNA = "always") -``` - -```{r} -#| echo: true -dat$tx.type |> table() -``` - -```{r} -ggplot(dat, aes(x = tx.type)) + - geom_bar() + - labs(x = "Transplant Type", y = "Count") + - theme_minimal() - -``` - -### Year `year` - -```{r} -dat$year |> table() -``` - -```{r} -ggplot(dat, aes(x = year)) + - geom_bar() + - labs(x = "Year", y = "Count") + - theme_minimal() -``` - -```{r} -ggplot(dat, aes(x = year, y = follow.up)) + - geom_boxplot() -``` - -```{r} -ggplot(dat, aes(x = year, fill = tx.type)) + - geom_bar(position = "dodge") -``` - -```{r} -ggplot(dat, aes(x = year, y = age.rec)) + - geom_boxplot() -``` - -```{r} -ggplot(dat, aes(x = year, y = age.donor)) + - geom_boxplot() -``` - -## Kaplan-Meier - -### Overall (L) - -```{r} -km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) -summary(km_all, times = c(4, 8, 12)) -``` - -```{r} -km_all |> - ggsurvfit(type = "survival") + - add_confidence_interval() + - scale_y_continuous(limits = c(0, 1), labels = scales::label_percent()) + - labs( - x = "Years of Follow-up", - y = "Overall Survival Probability" - ) + - theme_minimal() -``` - -### `tx.type` (M) - -```{r, fig.width=7, fig.height=5} -km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat) - -km.tx |> - ggsurvfit(type = "survival") + - add_confidence_interval() + - scale_x_continuous( - breaks = seq(0, 20, by = 4) - ) + - scale_y_continuous( - limits = c(0, 1), - breaks = seq(0, 1, by = 0.2), - labels = scales::label_number(accuracy = 0.1) - ) + - labs( - x = "Years of Follow-up", - y = "Overall Survival Probability" - ) + - theme_minimal() + - theme( - legend.position = "bottom", - plot.title = element_text(size = 17, face = "bold"), - axis.title = element_text(size = 15), - axis.text = element_text(size = 10), - legend.text = element_text(size = 12), - plot.margin = margin(20, 30, 20, 30) - ) -``` - -## Cox Model - -```{r} -get.life.table <- function(dat, time.intervals) { - n.pop <- nrow(dat) - - dat |> - recode.dat(time.intervals) |> - group_by(fu.interval) |> - summarize( - n.censored = sum(.data$death == 0), - n.event = sum(.data$death), - ) |> - ungroup() |> - calculate.hazard(n.pop) -} - -get.life.table.by.groups <- function(dat, time.intervals, grps) { - grps |> - lapply(function(grp) { - dat |> - get.life.table.by.group(time.intervals, grp) |> - mutate( - grp.name = grp, - grp.value = pick(1)[[1]] - ) |> - select(-1) - }) |> - bind_rows() -} - -get.life.table.by.group <- function(dat, time.intervals, grp) { - dat |> - recode.dat(time.intervals) |> - group_by(fu.interval, .data[[grp]]) |> - summarize( - n.censored = sum(.data$death == 0), - n.event = sum(.data$death), - .groups = "keep" - ) |> - ungroup(fu.interval) |> - group_modify(function(df.sub, grp) { - grp.name <- names(grp) - grp.value <- grp[[1]] - n.pop <- (dat[[grp.name]] == grp.value) |> sum() - calculate.hazard(df.sub, n.pop) - }) |> - ungroup() -} - -calculate.hazard <- function(life.table, n.pop) { - n.removed <- life.table$n.event + life.table$n.censored - n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) - life.table |> - mutate( - n.at.risk = n.pop - n.removed.accum, - # TODO: how to account for censored? How do we adjust for uneven interval? - hazard.rate = n.event / n.at.risk - ) -} - -recode.dat <- function(dat, time.intervals) { - df <- dat[dat$follow.up <= sum(time.intervals), ] - time.points <- cumsum(time.intervals) - df$fu.interval <- sapply(df$follow.up, function(time) { - time.points[time <= time.points][1] - }) - - df -} -``` - -### `death` Distribution (?) - -```{r} -# dat |> -# mutate( -# accum.death = cumsum(death), -# accum.censored = if_else(death == 1, 0, 1) |> cumsum() -# ) |> -# ggplot() + -# geom_pont(aes(x = follow.up, y = accum.death, color = death)) -``` - -```{r} - -``` - -```{r} -plot.death = dat[dat$death == 1,] |> - ggplot(aes(x = follow.up)) + - geom_histogram(bins = 50) -plot.death -``` - - -### Overall (L) - -::: {.callout-note} -skip overall, jump directly to `tx.type` -::: - -```{r} -time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) -get.life.table(dat, time.intervals) -``` - -### `tx.type` (M) - - -### `age` (continuous) (L) - -### `age` (categorical) (M) - -### Full model (L) - -```{r} -m1 <- coxph(Surv(follow.up, death) ~ hla.match + tx.type, data = dat) -summary(m1) - -m2 <- coxph(Surv(follow.up, death) ~ hla.match * tx.type, data = dat) -summary(m2) - -anova(m1, m2, test = "LRT") -``` - -### Discussion: include `year` or not? - -## Assumption Testing - - - ## Why Survival Analysis (L) - -Motivation: study the distribution of time to event $T$. - -Example: time of death after kidney transplant. - - -```{r} -ex <- data.frame( - id = c(1, 2, 3, 4, 5), - transplant = c(2000, 2000, 2001, 2003, 2004), - death = c(2005, 2009, 2005, 2004, 2016) -) - -ex_table_1 <- ex |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death = "Death" - ) %>% - cols_align(align = "center", columns = everything()) - -ex_plot_real_time <- ggplot(ex) + - geom_segment(aes(x = transplant, xend = death, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() -``` - -```{r} -ex_table_1 -ex_plot_real_time -``` - ---- - -We then calculate time to death for each respondents. - -With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X` - -```{r} -ex_t <- ex |> - mutate( - t = death - transplant - ) - -ex_table_2 <- ex_t |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death = "Death", - t = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) - -ex_plot_uniform_time <- ggplot(ex_t) + - geom_segment(aes(x = 0, xend = t, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() - -``` - - -```{r} -ex_table_2 -ex_plot_uniform_time -``` - ---- - -What if we do not know when exactly some respondents die? - -Scenario 1: the study ends at the year 2008? - -```{r} -ex_t_c <- ex_t |> - mutate( - death_censored = if_else(death <= 2008, death, 2008), - death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"), - status = if_else(death <= 2008, 1, 0), - t_1 = death_censored - transplant, - t_1_txt = if_else(death <= 2008, as.character(t_1), - paste(">", as.character(t_1))) - ) -``` - -```{r} -ex_table_3 <- ex_t_c |> - select(id, transplant, death_censored_txt, t_1_txt) |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death_censored_txt = "Death", - t_1_txt = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) - -ex_plot_real_time_1 <- ggplot(ex_t_c) + - geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death_censored, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - xlim(2000, 2016) + - geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", - linewidth = 0.8) + - theme_minimal() - - -ex_plot_uniform_time_1 <- ggplot(ex_t_c) + - geom_segment(aes(x = 0, xend = t_1, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t_1, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() -``` - - -```{r} -ex_table_3 -ex_plot_real_time_1 -ex_plot_uniform_time_1 -``` - -**Right Censoring**: only observe the event (death) if it occurs before a -certain time (2008). - ---- - -Scenario 2: respondent 3 move away; loss follow up - -```{r} -ex_alt <- ex_t_c -ex_alt$death_censored_txt[3] <- "> 2005" -ex_alt$status[3] <- 0 -ex_alt$t_1_txt[3] <- "> 4" - - -ex_table_4 <- ex_alt |> - select(id, transplant, death_censored_txt, t_1_txt) |> - gt() |> - cols_label( - id = "ID", - transplant = "Transplant", - death_censored_txt = "Death", - t_1_txt = "Time to death" - ) |> - cols_align(align = "center", columns = everything()) - -ex_plot_real_time_2 <- ggplot(ex_alt) + - geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = death_censored, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - xlim(2000, 2016) + - geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", - linewidth = 0.8) + - theme_minimal() - - -ex_plot_uniform_time_2 <- ggplot(ex_alt) + - geom_segment(aes(x = 0, xend = t_1, y = factor(id), - yend = factor(id)), color = "grey50", linewidth = 1) + - geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + - geom_point(aes(x = t_1, y = factor(id), color = - if_else(status == 1, "death", "censored")), size = 3) + - scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", - "censored" = "orange")) + - scale_y_discrete(limits = rev) + - labs(x = "Year", y = "Subject ID", color = "Event") + - theme_minimal() -``` - -```{r} -ex_table_4 -ex_plot_real_time_2 -ex_plot_uniform_time_2 -``` - ---- - -How many patients are right censored? - -```{r} -dat |> - mutate(Overall = "Overall") |> - pivot_longer( - cols = c(Overall, sex, tx.type), - names_to = "Attribute", - values_to = "Category" - ) |> - count(Attribute, Category, death) |> - ggplot(aes(x = Category, y = n, fill = factor(death))) + - geom_col(position = "dodge") + - facet_wrap(~ Attribute, scales = "free_x") + - labs(x = "Group", y = "Count", fill = "Death Status") + - theme_minimal() -``` - -## Challenges in Survival Analysis (L) - -- **Right Censoring**: only observe the event if it occurs before a certain - time. -- **Left Censoring**: event has occurred prior to the start of a research - - Follow up every 3 years. - - Event has occurred -> event happened sometime before follow up. -- **Left Truncation**: delayed entry; respondents are included only if they - survived long enough. - - Start follow up with patients 100 days after the transplant - - Patients dies within 100 day wouldn't be included in the dataset -- **Right Truncation**: respondents are included only if they have already - experienced the event. - - Retrospective analysis from deceased patients between 1990 and 2000. - - Patients not dead before 2000 are not included in the dataset. -
- diff --git a/slides.html b/slides.html index 260d904..80b495b 100644 --- a/slides.html +++ b/slides.html @@ -2214,48 +2214,6 @@ window.Quarto = {

2 Method & Result

2.1 Explain Dataset (M)

-
-
-
- -
-
-Things to highlight in figures -
-
-
-
    -
  • sex and recipient age are similar across tx.type

  • -
  • Living donors have less hla.match then cadaveric donors

  • -
  • There is age difference between different tx.type

  • -
-
-
-
-

2.1.1 sex ~ tx.tpye

-
-
-
- -
-
-Note -
-
-
-

sex are similar across tx.type

-
-
-
-
-
-
-

-
-
-
-
-

2.2 Table 1 (L)

@@ -2272,7 +2230,7 @@ Note
-

+

@@ -2281,37 +2239,11 @@ Note
-

+

-
-
-
- -
-
-Warning -
-
-
-

TODO: change to percentage within each tx.type

-
-
-
-
-
- -
-
-Note -
-
-
-

Living donors have less hla.match then cadaveric donors

-
-

2.2.2 Donor age age.donor

@@ -2333,7 +2265,7 @@ Note
-

+

@@ -2346,7 +2278,7 @@ Note
-

+

@@ -2359,15 +2291,28 @@ Note
-

+

+
+
+

2.2.3 Recipient Age age.rec

+
+
mean(dat$age.rec, na.rm = TRUE)
+
+
[1] 11.64653
+
+
median(dat$age.rec, na.rm = TRUE)
+
+
[1] 13
+
+
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_density()`).
+
Warning: Removed 9 rows containing non-finite outside the scale range
+(`stat_count()`).
@@ -2390,36 +2335,23 @@ Note
-
-
-
- -
-
-Note -
-
-
-

There is age difference between different tx.type

-
-
-
-
-

2.2.3 Recipient Age age.rec

-
mean(dat$age.rec, na.rm = TRUE)
-
-
[1] 11.64653
+
+
Warning: Removed 9 rows containing non-finite outside the scale range
+(`stat_count()`).
+
+
+
+
+

+
-
median(dat$age.rec, na.rm = TRUE)
-
-
[1] 13
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_count()`).
+(`stat_density()`).
@@ -2431,7 +2363,7 @@ Note
-
Warning: Removed 113 rows containing non-finite outside the scale range
+
Warning: Removed 2250 rows containing non-finite outside the scale range
 (`stat_boxplot()`).
@@ -2442,22 +2374,22 @@ Note
+
+
+

2.2.4 Cold Ischemia Time cold.isc

-
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_count()`).
-
-
-
-
-

-
+
mean(dat$cold.isc, na.rm = TRUE)
+
+
[1] 10.85967
+
median(dat$cold.isc, na.rm = TRUE)
+
+
[1] 7
-
Warning: Removed 9 rows containing non-finite outside the scale range
+
Warning: Removed 2250 rows containing non-finite outside the scale range
 (`stat_density()`).
@@ -2471,7 +2403,7 @@ Note
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_boxplot()`).
+(`stat_density()`).
@@ -2482,69 +2414,10 @@ Note
-
-

2.2.4 Cold Ischemia Time cold.isc

-
-
mean(dat$cold.isc, na.rm = TRUE)
-
-
[1] 10.85967
-
-
median(dat$cold.isc, na.rm = TRUE)
-
-
[1] 7
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-

2.2.5 Transplant Type tx.type

-
-

-Cadaveric    Living      <NA> 
-     5148      4627         0 
-
-
-
-
dat$tx.type |> table()
+
dat$tx.type |> table()

 Cadaveric    Living 
@@ -2555,7 +2428,7 @@ Cadaveric    Living
 
-

+

@@ -2574,7 +2447,7 @@ Cadaveric Living
-

+

@@ -2583,7 +2456,7 @@ Cadaveric Living
-

+

@@ -2592,7 +2465,7 @@ Cadaveric Living
-

+

@@ -2605,7 +2478,7 @@ Cadaveric Living
-

+

@@ -2618,79 +2491,50 @@ Cadaveric Living
-

+

-
-

2.3 Kaplan-Meier

-
-

2.3.1 Overall (L)

-
-
-
Call: survfit(formula = Surv(follow.up, death) ~ 1, data = dat)
-
- time n.risk n.event survival std.err lower 95% CI upper 95% CI
-    4   4150     359    0.953 0.00252        0.948        0.958
-    8   1197      86    0.919 0.00449        0.910        0.928
-   12     48      20    0.888 0.00867        0.871        0.905
-
-
+
+

2.3 Overall Kaplan-Meier

-

+

-
-

2.3.2 tx.type (M)

-
-
-
-
-

-
-
-
-
-
-
-
-

2.4 Cox Model

+
+

2.4 Hazard Functions

2.4.1 death Distribution (?)

-

+

+
+
+
+
+
+
+
+
+

-
-

2.4.2 Overall (L)

-
-
-
- -
-
-Note -
-
-
-

skip overall, jump directly to tx.type

-
-
+
+

2.4.2 Overall (?)

# A tibble: 7 × 5
@@ -2706,17 +2550,20 @@ Note
 
-
-

2.4.3 tx.type (M)

+
+

2.4.3 tx.type (M)

-
-

2.4.4 age (continuous) (L)

-
-

2.4.5 age (categorical) (M)

+
+

2.5 Cox Model

+
+

2.5.1 age (continuous) (L)

-
-

2.4.6 Full model (L)

+
+

2.5.2 age (categorical) (M)

+
+
+

2.5.3 Full model (L)

Call:
@@ -2807,12 +2654,12 @@ Score (logrank) test = 69.92  on 13 df,   p=8e-10
-
-

2.4.7 Discussion: include year or not?

+
+

2.5.4 Discussion: include year or not?

-
-

2.5 Assumption Testing

+
+

2.6 Assumption Testing

diff --git a/slides.qmd b/slides.qmd new file mode 100644 index 0000000..81b33bb --- /dev/null +++ b/slides.qmd @@ -0,0 +1,609 @@ +--- +title: Slides +execute: + cache: true + freeze: auto + include: true + echo: false +number-sections: true +--- + +```{r} +library(tidyverse) +library(dplyr) +library(ggplot2) +library(survival) +library(emmeans) +library(foreign) +library(gtsummary) +library(gt) +library(ggsurvfit) +``` + +```{r} +dat <- read.csv("./unos.txt", sep = "\t") +names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death", + "year", "sex", "tx.type", "follow.up") +dat <- dat |> + mutate( + sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), + tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), + hla.match = factor(hla.match), + year = factor(year) + ) +``` + +# Introduction: research question + +## Survival after transplantation (M) + +## Identify Predictors (M) + +## Why Survival Analysis (L) + +Motivation: study the distribution of time to event $T$. + +Example: time of death after kidney transplant. + + +```{r} +ex <- data.frame( + id = c(1, 2, 3, 4, 5), + transplant = c(2000, 2000, 2001, 2003, 2004), + death = c(2005, 2009, 2005, 2004, 2016) +) + +ex_table_1 <- ex |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death = "Death" + ) %>% + cols_align(align = "center", columns = everything()) + +ex_plot_real_time <- ggplot(ex) + + geom_segment(aes(x = transplant, xend = death, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + +```{r} +ex_table_1 +ex_plot_real_time +``` + +--- + +We then calculate time to death for each respondents. + +With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X` + +```{r} +ex_t <- ex |> + mutate( + t = death - transplant + ) + +ex_table_2 <- ex_t |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death = "Death", + t = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_uniform_time <- ggplot(ex_t) + + geom_segment(aes(x = 0, xend = t, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() + +``` + + +```{r} +ex_table_2 +ex_plot_uniform_time +``` + +--- + +What if we do not know when exactly some respondents die? + +Scenario 1: the study ends at the year 2008? + +```{r} +ex_t_c <- ex_t |> + mutate( + death_censored = if_else(death <= 2008, death, 2008), + death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"), + status = if_else(death <= 2008, 1, 0), + t_1 = death_censored - transplant, + t_1_txt = if_else(death <= 2008, as.character(t_1), + paste(">", as.character(t_1))) + ) +``` + +```{r} +ex_table_3 <- ex_t_c |> + select(id, transplant, death_censored_txt, t_1_txt) |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death_censored_txt = "Death", + t_1_txt = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_real_time_1 <- ggplot(ex_t_c) + + geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death_censored, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + xlim(2000, 2016) + + geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", + linewidth = 0.8) + + theme_minimal() + + +ex_plot_uniform_time_1 <- ggplot(ex_t_c) + + geom_segment(aes(x = 0, xend = t_1, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t_1, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + + +```{r} +ex_table_3 +ex_plot_real_time_1 +ex_plot_uniform_time_1 +``` + +**Right Censoring**: only observe the event (death) if it occurs before a +certain time (2008). + +--- + +Scenario 2: respondent 3 move away; loss follow up + +```{r} +ex_alt <- ex_t_c +ex_alt$death_censored_txt[3] <- "> 2005" +ex_alt$status[3] <- 0 +ex_alt$t_1_txt[3] <- "> 4" + + +ex_table_4 <- ex_alt |> + select(id, transplant, death_censored_txt, t_1_txt) |> + gt() |> + cols_label( + id = "ID", + transplant = "Transplant", + death_censored_txt = "Death", + t_1_txt = "Time to death" + ) |> + cols_align(align = "center", columns = everything()) + +ex_plot_real_time_2 <- ggplot(ex_alt) + + geom_segment(aes(x = transplant, xend = death_censored, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = death_censored, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + xlim(2000, 2016) + + geom_vline(xintercept = 2008, linetype = "dashed", color = "orange", + linewidth = 0.8) + + theme_minimal() + + +ex_plot_uniform_time_2 <- ggplot(ex_alt) + + geom_segment(aes(x = 0, xend = t_1, y = factor(id), + yend = factor(id)), color = "grey50", linewidth = 1) + + geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) + + geom_point(aes(x = t_1, y = factor(id), color = + if_else(status == 1, "death", "censored")), size = 3) + + scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D", + "censored" = "orange")) + + scale_y_discrete(limits = rev) + + labs(x = "Year", y = "Subject ID", color = "Event") + + theme_minimal() +``` + +```{r} +ex_table_4 +ex_plot_real_time_2 +ex_plot_uniform_time_2 +``` + +--- + +How many patients are right censored? + +```{r} +dat |> + mutate(Overall = "Overall") |> + pivot_longer( + cols = c(Overall, sex, tx.type), + names_to = "Attribute", + values_to = "Category" + ) |> + count(Attribute, Category, death) |> + ggplot(aes(x = Category, y = n, fill = factor(death))) + + geom_col(position = "dodge") + + facet_wrap(~ Attribute, scales = "free_x") + + labs(x = "Group", y = "Count", fill = "Death Status") + + theme_minimal() +``` + +## Challenges in Survival Analysis (L) + +- **Right Censoring**: only observe the event if it occurs before a certain + time. +- **Left Censoring**: event has occurred prior to the start of a research + - Follow up every 3 years. + - Event has occurred -> event happened sometime before follow up. +- **Left Truncation**: delayed entry; respondents are included only if they + survived long enough. + - Start follow up with patients 100 days after the transplant + - Patients dies within 100 day wouldn't be included in the dataset +- **Right Truncation**: respondents are included only if they have already + experienced the event. + - Retrospective analysis from deceased patients between 1990 and 2000. + - Patients not dead before 2000 are not included in the dataset. + +# Method & Result + +## Explain Dataset (M) + +## Table 1 (L) + + +### HLA match `hla.match` + +```{r} +dat$hla.match |> table(useNA = "always") +``` + +```{r} +ggplot(dat, aes(x = hla.match)) + + geom_bar() + + labs(x = "HLA match", y = "Count") + + theme_minimal() + +``` + +```{r} +ggplot(dat, aes(x = hla.match, fill = tx.type)) + + geom_bar(position = "dodge") + + labs(x = "HLA match", y = "Count") + + theme_minimal() +``` + +### Donor age `age.donor` + +```{r} +#| echo: true +mean(dat$age.donor, na.rm = TRUE) +median(dat$age.donor, na.rm = TRUE) +``` + +```{r} +ggplot(dat, aes(x = age.donor)) + + geom_bar() + + labs(x = "Donor Age", y = "Count") + + theme_minimal() +``` + +```{r} +#| fig-width: 8 +#| fig-height: 12 +ggplot(dat, aes(x = age.donor)) + + geom_bar() + + labs(x = "Donor Age", y = "Count") + + theme_minimal() + + facet_grid(hla.match ~ .) +``` + +```{r} +ggplot(dat, aes(x = age.donor, color = hla.match)) + + geom_density() + + labs(x = "Donor Age", y = "Count") + + theme_minimal() +``` + + +### Recipient Age `age.rec` + +```{r} +#| echo: true +mean(dat$age.rec, na.rm = TRUE) +median(dat$age.rec, na.rm = TRUE) +``` + + +```{r} +ggplot(dat, aes(x = age.rec)) + + geom_bar() + + labs(x = "Recipient Age", y = "Count") + + theme_minimal() + +``` + +```{r} +ggplot(dat, aes(x = as.factor(age.rec), y = age.donor)) + + geom_boxplot() + + theme_minimal() + +``` + +```{r} +ggplot(dat, aes(x = age.rec, fill = tx.type)) + + geom_bar(position = "dodge") + + theme_minimal() + +``` + +```{r} +ggplot(dat, aes(x = age.rec, color = hla.match)) + + geom_density() + + theme_minimal() +``` + +```{r} +ggplot(dat, aes(x = as.factor(age.rec), y = cold.isc)) + + geom_boxplot() + + theme_minimal() +``` + + +### Cold Ischemia Time `cold.isc` + +```{r} +#| echo: true +mean(dat$cold.isc, na.rm = TRUE) +median(dat$cold.isc, na.rm = TRUE) +``` + +```{r} +ggplot(dat, aes(x = cold.isc)) + + geom_density() + + labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + + theme_minimal() +``` + +```{r} +ggplot(dat, aes(x = cold.isc, color = tx.type, group = tx.type)) + + geom_density() + + labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") + + theme_minimal() +``` + +### Transplant Type `tx.type` + +```{r} +#| echo: true +dat$tx.type |> table() +``` + +```{r} +ggplot(dat, aes(x = tx.type)) + + geom_bar() + + labs(x = "Transplant Type", y = "Count") + + theme_minimal() + +``` + +### Year `year` + +```{r} +dat$year |> table() +``` + +```{r} +ggplot(dat, aes(x = year)) + + geom_bar() + + labs(x = "Year", y = "Count") + + theme_minimal() +``` + +```{r} +ggplot(dat, aes(x = year, y = follow.up)) + + geom_boxplot() +``` + +```{r} +ggplot(dat, aes(x = year, fill = tx.type)) + + geom_bar(position = "dodge") +``` + +```{r} +ggplot(dat, aes(x = year, y = age.rec)) + + geom_boxplot() +``` + +```{r} +ggplot(dat, aes(x = year, y = age.donor)) + + geom_boxplot() +``` + +## Overall Kaplan-Meier + +```{r} +km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) +``` + +```{r} +km_all |> + ggsurvfit(type = "survival") + + add_confidence_interval() + + scale_y_continuous(limits = c(0, 1), labels = scales::label_percent()) + + labs( + x = "Years of Follow-up", + y = "Overall Survival Probability" + ) + + theme_minimal() +``` + +## Hazard Functions + +```{r} +get.life.table <- function(dat, time.intervals) { + n.pop <- nrow(dat) + + dat |> + recode.dat(time.intervals) |> + group_by(fu.interval) |> + summarize( + n.censored = sum(.data$death == 0), + n.event = sum(.data$death), + ) |> + ungroup() |> + calculate.hazard(n.pop) +} + +get.life.table.by.groups <- function(dat, time.intervals, grps) { + grps |> + lapply(function(grp) { + dat |> + get.life.table.by.group(time.intervals, grp) |> + mutate( + grp.name = grp, + grp.value = pick(1)[[1]] + ) |> + select(-1) + }) |> + bind_rows() +} + +get.life.table.by.group <- function(dat, time.intervals, grp) { + dat |> + recode.dat(time.intervals) |> + group_by(fu.interval, .data[[grp]]) |> + summarize( + n.censored = sum(.data$death == 0), + n.event = sum(.data$death), + .groups = "keep" + ) |> + ungroup(fu.interval) |> + group_modify(function(df.sub, grp) { + grp.name <- names(grp) + grp.value <- grp[[1]] + n.pop <- (dat[[grp.name]] == grp.value) |> sum() + calculate.hazard(df.sub, n.pop) + }) |> + ungroup() +} + +calculate.hazard <- function(life.table, n.pop) { + n.removed <- life.table$n.event + life.table$n.censored + n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) + life.table |> + mutate( + n.at.risk = n.pop - n.removed.accum, + # TODO: how to account for censored? How do we adjust for uneven interval? + hazard.rate = n.event / n.at.risk + ) +} + +recode.dat <- function(dat, time.intervals) { + df <- dat[dat$follow.up <= sum(time.intervals), ] + time.points <- cumsum(time.intervals) + df$fu.interval <- sapply(df$follow.up, function(time) { + time.points[time <= time.points][1] + }) + + df +} +``` + +### `death` Distribution (?) + +```{r} +dat |> + mutate( + accum.death = cumsum(death), + accum.censored = if_else(death == 1, 0, 1) |> cumsum() + ) |> + ggplot() + + geom_step(aes(x = follow.up, y = accum.death)) +``` + +```{r} + +``` + +```{r} +plot.death = dat[dat$death == 1,] |> + ggplot(aes(x = follow.up)) + + geom_histogram(bins = 50) +plot.death +``` + + +### Overall (?) + +```{r} +time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) +get.life.table(dat, time.intervals) +``` + +### `tx.type` (M) + + +```{r} + +``` + + +## Cox Model + +### `age` (continuous) (L) + +### `age` (categorical) (M) + +### Full model (L) + +```{r} +m1 <- coxph(Surv(follow.up, death) ~ hla.match + tx.type, data = dat) +summary(m1) + +m2 <- coxph(Surv(follow.up, death) ~ hla.match * tx.type, data = dat) +summary(m2) + +anova(m1, m2, test = "LRT") +``` + +### Discussion: include `year` or not? + +## Assumption Testing + +# Conclusion (M + L)