diff --git a/.lintr b/.lintr index a2491c9..de689e4 100644 --- a/.lintr +++ b/.lintr @@ -1,5 +1,6 @@ linters: lintr::linters_with_defaults( object_name_linter = lintr::object_name_linter( styles = c("snake_case", "dotted.case") - ) + ), + trailing_whitespace_linter = NULL ) diff --git a/report.Rmd b/report.Rmd index f51fe1b..d411108 100644 --- a/report.Rmd +++ b/report.Rmd @@ -1,733 +1,733 @@ ---- -title: foo -execute: - cache: true - freeze: auto -number-sections: true ---- - -```{r} -library(tidyverse) -library(dplyr) -library(ggplot2) -library(survival) -library(emmeans) -library(foreign) -library(gtsummary) -library(ggsurvfit) -``` - -```{r} -dat <- read.csv("./unos.txt", sep = "\t") -head(dat) -names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death", - "year", "sex", "tx.type", "follow.up") -``` - -Complete case - -```{r} -nrow(dat) -``` - - -```{r} -sapply(names(dat), function(col) { - is.na(dat[, col]) |> sum() -}) -``` - - - -# Exercise - -## Exercise 1 - -> Illustrate in a table the characteristics of the population (age, sex, race, -donor, . . . ). - -```{r} -hist(dat$age.donor) -hist(dat$age.rec) -hist(dat$cold.isc) -hist(dat$year) -hist(dat$follow.up) - -unique(dat$hla.match) -unique(dat$year) -``` -Change the order!! - -```{r} -dat.table1 <- dat |> - mutate( - sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), - tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), - hla.match = factor(hla.match), - year = factor(year) - ) -``` - -```{r} -table1 <- dat.table1 |> - select(hla.match, age.donor, age.rec, cold.isc, year, sex, tx.type) |> - tbl_summary( - statistic = list( - all_continuous() ~ "{median} ({p25}, {p75})" - ), - label = list( - hla.match ~ "HLA matches, n(%)", - age.donor ~ "Donor age, median (IQR)", - age.rec ~ "Recipient age, median (IQR)", - cold.isc ~ "Cold ischemic time (hours), median (IQR), ", - year ~ "Year of transplant", - sex ~ "Sex, n(%)", - tx.type ~ "Transplant type, n(%)" - ), - missing = "ifany" - ) |> - modify_footnote(all_stat_cols() ~ NA) - -table1 -``` - -```{r} -table.split <- dat.table1 |> - select(hla.match, age.donor, age.rec, cold.isc, year, sex, tx.type) |> - tbl_summary( - by = tx.type, - statistic = list( - all_continuous() ~ "{median} ({p25}, {p75})" - ), - label = list( - hla.match ~ "HLA matches, n(%)", - age.donor ~ "Donor age, median (IQR)", - age.rec ~ "Recipient age, median (IQR)", - cold.isc ~ "Cold ischemic time (hours), median (IQR), ", - year ~ "Year of transplant", - sex ~ "Sex, n(%)" - ), - missing = "ifany" - ) |> - add_overall() |> - modify_footnote(all_stat_cols() ~ NA) - -table.split -``` - -```{r} -table.split <- dat.table1 |> - select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> - tbl_summary( - by = tx.type, - statistic = list( - all_continuous() ~ "{median} ({p25}, {p75})" - ), - label = list( - hla.match ~ "HLA matches, n(%)", - age.donor ~ "Donor age, median (IQR)", - age.rec ~ "Recipient age, median (IQR)", - cold.isc ~ "Cold ischemic time (hours), median (IQR), ", - sex ~ "Sex, n(%)" - ), - missing = "ifany" - ) |> - add_overall() |> - modify_footnote(all_stat_cols() ~ NA) - -table.split -``` - - -Calculate median follow-up (reverse Kaplan-Meier method), with 95% CI -```{r} -rev_km <- survfit(Surv(follow.up, death == 0) ~ 1, data = dat) - -summary(rev_km)$table - -median_followup <- summary(rev_km)$table["median"] -confidence_interval <- c(summary(rev_km)$table["0.95LCL"], summary(rev_km)$table["0.95UCL"]) -median_followup -confidence_interval -``` - - -```{r} -g <- ggplot(dat) -``` - -```{r} -g + geom_point(aes(x = follow.up, y = death)) -g + geom_density(aes(x = follow.up)) - -g + geom_density(aes(x = age.donor)) -g + geom_bar(aes(x = age.rec)) - -g + geom_boxplot(aes(x = age.rec, y = age.donor, group = age.donor)) - -# dat$age.1 |> table() -``` - -```{r} -g + geom_boxplot(aes(x = year, y = follow.up, group = year)) -``` - -## Exercise 2 - -Plot the Kaplan-Meier overall survival curve for pediatric kid- -ney transplant recipients for the first 12 years after transplantation. - -!!!!!!!! just set xlim to 12, do not remove those individuals !!!!!!! - -```{r} -km <- survfit(Surv(follow.up, death) ~ 1, data = dat[dat$follow.up <= 12, ]) -plot(km) -``` - -## Exercise 3 - -We are going to compare mortality rates (hazard functions) -between children whose transplanted kidney was provided by a living donor -(in general a family member) and those whose source was recently deceased -(variable donor type: `txtype`). Use the life table method to calculate the death -rates for the first 5 years for each group (take in the first year intervals of 4 -months and then look at each year) and show the results in a table. Estimate -the hazard ratio in each time interval as the ratio between the mortality rates -in the two groups. What do you notice? - -```{r} -dat.5 <- dat[dat$follow.up <= 5, ] -head(dat.5) -classify_time_interval = function(fu) { - if (fu <= 1/3) { - return(1/3) - } else if (fu <= 2/3) { - return(2/3) - } else if (fu <= 1) { - return(1) - } - ceiling(fu) -} - -dat.5$fu.interval <- sapply(dat.5$follow.up, classify_time_interval) -table(dat.5$fu.interval) -head(dat.5) -``` - -```{r} -dat.5.life <- dat.5 |> - group_by(fu.interval) |> - summarize( - n.censored = sum(death == 0), - n.event = sum(death), - n.at.risk = nrow(dat), - ) - -for (i in 2:nrow(dat.5.life)) { - j <- i - 1 - - n.censored.pre <- dat.5.life$n.censored[j] - n.event.pre <- dat.5.life$n.event[j] - n.at.risk.pre <- dat.5.life$n.at.risk[j] - - n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre - - dat.5.life$n.at.risk[i] <- n.at.risk -} - -print(nrow(dat)) -dat.5.life - -``` -n.removed.acc -```{r} -dat.5.life <- dat.5.life |> - mutate( - hazard.rate = n.event / n.at.risk - ) - -``` - ---- - - -```{r} -get_life_table = function(dat.sub, dat) { - dat.sub <- dat.sub |> - group_by(fu.interval) |> - summarize( - n.censored = sum(death == 0), - n.event = sum(death), - n.at.risk = nrow(dat), - ) - - for (i in 2:nrow(dat.sub)) { - j <- i - 1 - - n.censored.pre <- dat.sub$n.censored[j] - n.event.pre <- dat.sub$n.event[j] - n.at.risk.pre <- dat.sub$n.at.risk[j] - - n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre - - dat.sub$n.at.risk[i] <- n.at.risk - } - - dat.sub <- dat.sub |> - mutate( - hazard.rate = n.event / n.at.risk - ) - - return(dat.sub) -} -``` - -```{r} -dat.5.tx0 = dat.5[dat.5$tx.type == 0, ] -dat.5.tx1 = dat.5[dat.5$tx.type == 1, ] -``` - -```{r} -tx0.life <- get_life_table(dat.5.tx0, dat[dat$tx.type == 0, ]) -tx0.life -``` - -```{r} -tx1.life <- get_life_table(dat.5.tx1, dat[dat$tx.type == 1, ]) -tx1.life -``` - - -```{r} -tx1.life$hazard.rate / tx0.life$hazard.rate -``` - -```{r} -hazard.df <- data.frame( - fu.interval = tx1.life$fu.interval, - hazard.rate.0 = tx0.life$hazard.rate, - hazard.rate.1 = tx1.life$hazard.rate, - hazard.ratio = tx1.life$hazard.rate / tx0.life$hazard.rate -) - -ggplot(hazard.df, aes(x = fu.interval)) + - geom_line(aes(y = hazard.rate.0), color = "blue") + - geom_line(aes(y = hazard.rate.1), color = "orange") - -ggplot(hazard.df, aes(x = fu.interval)) + - geom_line(aes(y = hazard.ratio)) - -``` - ---- - -General Solution: - -```{r} -get.life.table <- function(dat, time.intervals) { - n.pop <- nrow(dat) - - dat |> - recode.dat(time.intervals) |> - group_by(fu.interval) |> - summarize( - n.censored = sum(.data$death == 0), - n.event = sum(.data$death), - ) |> - ungroup() |> - calculate.hazard(n.pop) -} - -get.life.table.by.groups <- function(dat, time.intervals, grps) { - grps |> - lapply(function(grp) { - dat |> - get.life.table.by.group(time.intervals, grp) |> - mutate( - grp.name = grp, - grp.value = pick(1)[[1]] - ) |> - select(-1) - }) |> - bind_rows() -} - -get.life.table.by.group <- function(dat, time.intervals, grp) { - dat |> - recode.dat(time.intervals) |> - group_by(fu.interval, .data[[grp]]) |> - summarize( - n.censored = sum(.data$death == 0), - n.event = sum(.data$death), - .groups = "keep" - ) |> - ungroup(fu.interval) |> - group_modify(function(df.sub, grp) { - grp.name <- names(grp) - grp.value <- grp[[1]] - n.pop <- (dat[[grp.name]] == grp.value) |> sum() - calculate.hazard(df.sub, n.pop) - }) |> - ungroup() -} - -calculate.hazard <- function(life.table, n.pop) { - n.removed <- life.table$n.event + life.table$n.censored - n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) - life.table |> - mutate( - n.at.risk = n.pop - n.removed.accum, - # TODO: how to account for censored? How do we adjust for uneven interval? - hazard.rate = n.event / n.at.risk - ) -} - -recode.dat <- function(dat, time.intervals) { - df <- dat[dat$follow.up <= sum(time.intervals), ] - time.points <- cumsum(time.intervals) - df$fu.interval <- sapply(df$follow.up, function(time) { - time.points[time <= time.points][1] - }) - - df -} -``` - -```{r} -time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) -grps <- c("tx.type", "sex", "hla.match", "cold.isc") - -get.life.table.by.groups(dat, time.intervals, grps) -``` - -```{r} -grps <- c("tx.type", "sex", "hla.match", "cold.isc") -grps <- c("tx.type") - - -plot.hazard.rate.by.groups <- function(dat, time.intervals, grps) { - dat |> - get.life.table.by.groups(time.intervals, grps) |> - ggplot(aes(x = fu.interval, y = hazard.rate, - color = paste(grp.name, grp.value, sep = " = "))) + - geom_point() + - geom_line() + - labs( - color = "Group" - ) + - theme( - legend.position = "bottom" - ) -} - -``` -```{r} -time.intervals <- c(rep(1/3, 3), rep(1, 4)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) -``` - -```{r} -time.intervals <- rep(1, 5) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) -``` - -```{r} -time.intervals <- rep(1/3, 15) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) -``` - -```{r} -time.intervals <- c(rep(1/3, 3*3), rep(1, 2)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) -``` - -```{r} -time.intervals <- c(rep(1/4, 4), rep(1, 4)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) -``` - -```{r} -time.intervals <- c(rep(1/4, 4*2), rep(1, 3)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) -``` - -```{r} -time.intervals <- c(rep(1/4, 4*2), rep(1/3, 3*3), rep(1, 6)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) -``` - ---- - -Compare - -```{r} -time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) -get.life.table.by.groups(dat, time.intervals, grps) -``` - -```{r} -hazard.df -``` - - - -## Exercise 4 - -Show a plot with Kaplan-Meier survival curves for the two donor types. - -```{r} -km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat[dat$follow.up <= 12, ]) - -plot(km.tx, col = c("blue", "orange")) -legend(legend = c("cadaveric", "living"), "bottomleft", lwd = 2, col = c("blue", "orange")) -``` - -## Exercise 5 - -Fit a univariate Cox model with predictor donor type. Report -the hazard ratio and 95% confidence interval and interpret the result obtained. - -```{r} -cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat) -summary(cox.tx) -``` - -```{r} -log.hazard.ratio.hat <- cox.tx$coefficient -hazard.ratio.hat <- log.hazard.ratio.hat |> exp() - -``` - -```{r} -log.hazard.ratio.se <- summary(cox.tx)$coefficients[3] -log.hazard.ratio.hat.lower.bound <- log.hazard.ratio.hat - qnorm(0.975) * log.hazard.ratio.se -log.hazard.ratio.hat.upper.bound <- log.hazard.ratio.hat + qnorm(0.975) * log.hazard.ratio.se - -c(log.hazard.ratio.hat.lower.bound, log.hazard.ratio.hat.upper.bound) |> exp() -``` - -::: {.callout-note} -Patients receiving kidney transplants from living donors (`tx.type = 1`) has -1.91 higher hazard rate than those receiving from cadaveric donors -(`tx.type = 0`). - -Since 1 is not in the 95% confidence interval, the result is statistically -significant. -::: - - -# Exercise 6 - -Research shows that an important determinant of mortality -after kidney transplant is the age of the recipient. Fit a Cox model with age -as predictor and estimate the hazard ratio and its confidence interval. Consider -age first as continuous variable and then divide into categories. - -## Continuous - -```{r} -cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) -cox.age.rec |> summary() -``` - -## Categorical (Case 1) - -```{r} -recode.age <- function(dat, ages) { - df$age.group <- sapply(df$follow.up, function(time) { - time.points[time <= time.points][1] - }) - - df -} -``` - -```{r} -dat.age.rec <- dat[!is.na(dat$age.rec), ] -class.age <- function(age) { - if (age < 2) { - return("0-1") - } else if (age < 6) { - return("2-5") - } else if (age < 11) { - return("6-10") - } else if (age < 19) { - return("11-18") - } -} -dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |> - factor(levels = c( - "0-1", - "2-5", - "6-10", - "11-18" - )) - - -cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec) -``` - -```{r} -emmeans(cox.age.rec, pairwise ~ age.group, type = "response") -``` - - - -# Exercise 7 - -Fit a multivariate Cox model by using other predictors and -describe your results. - - -```{r} -sapply(names(dat), function(col) { - is.na(dat[, col]) |> sum() -}) -``` - -```{r} -names(dat) -``` - -```{r} -Waldtest <- function(cox.fit, i.betas){ - q <- length(i.betas) - coefs <- cox.fit$coefficients - # print(cox.fit$var) - var <- cox.fit$var[i.betas, i.betas] - Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] - p.value <- 1-pchisq(as.numeric(Wald), df = q) - return(cbind(Wald, p.value)) -} -``` - - -```{r} -fit.cox.model <- function(surv, i.betas) { - fit.tx <- coxph(surv, data = dat) - Waldtest(fit.tx, i.betas) -} -``` - -Start with `tx.type` - -```{r} -surv <- Surv(follow.up, death) ~ tx.type + age.rec -fit.cox.model(surv, 1:2) # NOTE: Why is this 1??? -``` - -Iteration 1: `tx.type + age + ?` - -```{r} -names(dat) -``` - -```{r} -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match -fit.cox.model(surv, 3) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor -fit.cox.model(surv, 3) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc -fit.cox.model(surv, 3) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + year -fit.cox.model(surv, 3) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex -fit.cox.model(surv, 3) -``` - -Add `hla.match` - -Iteration 2: `tx.type + age.rec + hla.match + ?` - -```{r} -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor -fit.cox.model(surv, 4) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + cold.isc -fit.cox.model(surv, 4) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + year -fit.cox.model(surv, 4) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + sex -fit.cox.model(surv, 4) -``` - -Add `age.donor` - -Iteration 2: `tx.type + age.rec + hla.match + age.donor + ?` - -```{r} -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - cold.isc -fit.cox.model(surv, 5) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - year -fit.cox.model(surv, 5) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - sex -fit.cox.model(surv, 5) -``` - -Add `year` - -```{r} -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - year + cold.isc -fit.cox.model(surv, 6) -surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - year + sex -fit.cox.model(surv, 6) -``` - -```{r} - -``` - - -Next, we test for each remaining variables - -# Exercise 8 - -Estimate the survival function for specific covariate patterns. -Based on the previous results choose the best predictors. - -Exercise 9 — Check the proportional hazards assumption. You may use the -function cox.zph. Discuss the result and possible implications. - -H0: PH-assumption holds -H1: PH-assumption doesn't hold - -```{r} -# different cox model!! -cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat) -cox.zph(cox.final) -cox.final -``` - - -Exercise 10 — Plot the Schoenfeld residuals and comment. -A non-random pattern or slope in a plot of scaled residuals against time indicates a violation, suggesting the covariate's impact changes, while a horizontal line around zero supports the assumption. - -```{r} -plot(cox.zph(cox.final)) -``` - -```{r, fig.width=10, fig.height=10} -plot(cox.zph(cox.final)) -``` - - - - -NOTES: -left censoring: event happened before time zero -> not applicable -left truncation: not applicable?? -right censoring: we saw this in the follow up times, administrative censoring high - -! No left truncation, since our target population is patients who received a -kidney transplant. So we don't care about patients who died before receiving -a transplant. - -! ADD: we used a Cox model but we already saw that the PH assumption doesn't hold ! - -? remove year from prediction model, since it is not a patient characteristic and may not be stable for future predictions. It mainly captures changes in clinical practice rather than individual risk. - - +--- +title: "foo" +execute: + cache: true + freeze: auto +number-sections: true +--- + +```{r} +library(tidyverse) +library(dplyr) +library(ggplot2) +library(survival) +library(emmeans) +library(foreign) +library(gtsummary) +library(ggsurvfit) +``` + +```{r} +dat <- read.csv("./unos.txt", sep = "\t") +head(dat) +names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death", + "year", "sex", "tx.type", "follow.up") +``` + +Complete case + +```{r} +nrow(dat) +``` + + +```{r} +sapply(names(dat), function(col) { + is.na(dat[, col]) |> sum() +}) +``` + + + +# Exercise + +## Exercise 1 + +> Illustrate in a table the characteristics of the population (age, sex, race, +donor, . . . ). + +```{r} +hist(dat$age.donor) +hist(dat$age.rec) +hist(dat$cold.isc) +hist(dat$year) +hist(dat$follow.up) + +unique(dat$hla.match) +unique(dat$year) +``` +Change the order!! + +```{r} +dat.table1 <- dat |> + mutate( + sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), + tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), + hla.match = factor(hla.match), + year = factor(year) + ) +``` + +```{r} +table1 <- dat.table1 |> + select(hla.match, age.donor, age.rec, cold.isc, year, sex, tx.type) |> + tbl_summary( + statistic = list( + all_continuous() ~ "{median} ({p25}, {p75})" + ), + label = list( + hla.match ~ "HLA matches, n(%)", + age.donor ~ "Donor age, median (IQR)", + age.rec ~ "Recipient age, median (IQR)", + cold.isc ~ "Cold ischemic time (hours), median (IQR), ", + year ~ "Year of transplant", + sex ~ "Sex, n(%)", + tx.type ~ "Transplant type, n(%)" + ), + missing = "ifany" + ) |> + modify_footnote(all_stat_cols() ~ NA) + +table1 +``` + +```{r} +table.split <- dat.table1 |> + select(hla.match, age.donor, age.rec, cold.isc, year, sex, tx.type) |> + tbl_summary( + by = tx.type, + statistic = list( + all_continuous() ~ "{median} ({p25}, {p75})" + ), + label = list( + hla.match ~ "HLA matches, n(%)", + age.donor ~ "Donor age, median (IQR)", + age.rec ~ "Recipient age, median (IQR)", + cold.isc ~ "Cold ischemic time (hours), median (IQR), ", + year ~ "Year of transplant", + sex ~ "Sex, n(%)" + ), + missing = "ifany" + ) |> + add_overall() |> + modify_footnote(all_stat_cols() ~ NA) + +table.split +``` + +```{r} +table.split <- dat.table1 |> + select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> + tbl_summary( + by = tx.type, + statistic = list( + all_continuous() ~ "{median} ({p25}, {p75})" + ), + label = list( + hla.match ~ "HLA matches, n(%)", + age.donor ~ "Donor age, median (IQR)", + age.rec ~ "Recipient age, median (IQR)", + cold.isc ~ "Cold ischemic time (hours), median (IQR), ", + sex ~ "Sex, n(%)" + ), + missing = "ifany" + ) |> + add_overall() |> + modify_footnote(all_stat_cols() ~ NA) + +table.split +``` + + +Calculate median follow-up (reverse Kaplan-Meier method), with 95% CI +```{r} +rev_km <- survfit(Surv(follow.up, death == 0) ~ 1, data = dat) + +summary(rev_km)$table + +median_followup <- summary(rev_km)$table["median"] +confidence_interval <- c(summary(rev_km)$table["0.95LCL"], summary(rev_km)$table["0.95UCL"]) +median_followup +confidence_interval +``` + + +```{r} +g <- ggplot(dat) +``` + +```{r} +g + geom_point(aes(x = follow.up, y = death)) +g + geom_density(aes(x = follow.up)) + +g + geom_density(aes(x = age.donor)) +g + geom_bar(aes(x = age.rec)) + +g + geom_boxplot(aes(x = age.rec, y = age.donor, group = age.donor)) + +# dat$age.1 |> table() +``` + +```{r} +g + geom_boxplot(aes(x = year, y = follow.up, group = year)) +``` + +## Exercise 2 + +Plot the Kaplan-Meier overall survival curve for pediatric kid- +ney transplant recipients for the first 12 years after transplantation. + +!!!!!!!! just set xlim to 12, do not remove those individuals !!!!!!! + +```{r} +km <- survfit(Surv(follow.up, death) ~ 1, data = dat[dat$follow.up <= 12, ]) +plot(km) +``` + +## Exercise 3 + +We are going to compare mortality rates (hazard functions) +between children whose transplanted kidney was provided by a living donor +(in general a family member) and those whose source was recently deceased +(variable donor type: `txtype`). Use the life table method to calculate the death +rates for the first 5 years for each group (take in the first year intervals of 4 +months and then look at each year) and show the results in a table. Estimate +the hazard ratio in each time interval as the ratio between the mortality rates +in the two groups. What do you notice? + +```{r} +dat.5 <- dat[dat$follow.up <= 5, ] +head(dat.5) +classify_time_interval = function(fu) { + if (fu <= 1/3) { + return(1/3) + } else if (fu <= 2/3) { + return(2/3) + } else if (fu <= 1) { + return(1) + } + ceiling(fu) +} + +dat.5$fu.interval <- sapply(dat.5$follow.up, classify_time_interval) +table(dat.5$fu.interval) +head(dat.5) +``` + +```{r} +dat.5.life <- dat.5 |> + group_by(fu.interval) |> + summarize( + n.censored = sum(death == 0), + n.event = sum(death), + n.at.risk = nrow(dat), + ) + +for (i in 2:nrow(dat.5.life)) { + j <- i - 1 + + n.censored.pre <- dat.5.life$n.censored[j] + n.event.pre <- dat.5.life$n.event[j] + n.at.risk.pre <- dat.5.life$n.at.risk[j] + + n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre + + dat.5.life$n.at.risk[i] <- n.at.risk +} + +print(nrow(dat)) +dat.5.life + +``` +n.removed.acc +```{r} +dat.5.life <- dat.5.life |> + mutate( + hazard.rate = n.event / n.at.risk + ) + +``` + +--- + + +```{r} +get_life_table = function(dat.sub, dat) { + dat.sub <- dat.sub |> + group_by(fu.interval) |> + summarize( + n.censored = sum(death == 0), + n.event = sum(death), + n.at.risk = nrow(dat), + ) + + for (i in 2:nrow(dat.sub)) { + j <- i - 1 + + n.censored.pre <- dat.sub$n.censored[j] + n.event.pre <- dat.sub$n.event[j] + n.at.risk.pre <- dat.sub$n.at.risk[j] + + n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre + + dat.sub$n.at.risk[i] <- n.at.risk + } + + dat.sub <- dat.sub |> + mutate( + hazard.rate = n.event / n.at.risk + ) + + return(dat.sub) +} +``` + +```{r} +dat.5.tx0 = dat.5[dat.5$tx.type == 0, ] +dat.5.tx1 = dat.5[dat.5$tx.type == 1, ] +``` + +```{r} +tx0.life <- get_life_table(dat.5.tx0, dat[dat$tx.type == 0, ]) +tx0.life +``` + +```{r} +tx1.life <- get_life_table(dat.5.tx1, dat[dat$tx.type == 1, ]) +tx1.life +``` + + +```{r} +tx1.life$hazard.rate / tx0.life$hazard.rate +``` + +```{r} +hazard.df <- data.frame( + fu.interval = tx1.life$fu.interval, + hazard.rate.0 = tx0.life$hazard.rate, + hazard.rate.1 = tx1.life$hazard.rate, + hazard.ratio = tx1.life$hazard.rate / tx0.life$hazard.rate +) + +ggplot(hazard.df, aes(x = fu.interval)) + + geom_line(aes(y = hazard.rate.0), color = "blue") + + geom_line(aes(y = hazard.rate.1), color = "orange") + +ggplot(hazard.df, aes(x = fu.interval)) + + geom_line(aes(y = hazard.ratio)) + +``` + +--- + +General Solution: + +```{r} +get.life.table <- function(dat, time.intervals) { + n.pop <- nrow(dat) + + dat |> + recode.dat(time.intervals) |> + group_by(fu.interval) |> + summarize( + n.censored = sum(.data$death == 0), + n.event = sum(.data$death), + ) |> + ungroup() |> + calculate.hazard(n.pop) +} + +get.life.table.by.groups <- function(dat, time.intervals, grps) { + grps |> + lapply(function(grp) { + dat |> + get.life.table.by.group(time.intervals, grp) |> + mutate( + grp.name = grp, + grp.value = pick(1)[[1]] + ) |> + select(-1) + }) |> + bind_rows() +} + +get.life.table.by.group <- function(dat, time.intervals, grp) { + dat |> + recode.dat(time.intervals) |> + group_by(fu.interval, .data[[grp]]) |> + summarize( + n.censored = sum(.data$death == 0), + n.event = sum(.data$death), + .groups = "keep" + ) |> + ungroup(fu.interval) |> + group_modify(function(df.sub, grp) { + grp.name <- names(grp) + grp.value <- grp[[1]] + n.pop <- (dat[[grp.name]] == grp.value) |> sum() + calculate.hazard(df.sub, n.pop) + }) |> + ungroup() +} + +calculate.hazard <- function(life.table, n.pop) { + n.removed <- life.table$n.event + life.table$n.censored + n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) + life.table |> + mutate( + n.at.risk = n.pop - n.removed.accum, + # TODO: how to account for censored? How do we adjust for uneven interval? + hazard.rate = n.event / n.at.risk + ) +} + +recode.dat <- function(dat, time.intervals) { + df <- dat[dat$follow.up <= sum(time.intervals), ] + time.points <- cumsum(time.intervals) + df$fu.interval <- sapply(df$follow.up, function(time) { + time.points[time <= time.points][1] + }) + + df +} +``` + +```{r} +time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) +grps <- c("tx.type", "sex", "hla.match", "cold.isc") + +get.life.table.by.groups(dat, time.intervals, grps) +``` + +```{r} +grps <- c("tx.type", "sex", "hla.match", "cold.isc") +grps <- c("tx.type") + + +plot.hazard.rate.by.groups <- function(dat, time.intervals, grps) { + dat |> + get.life.table.by.groups(time.intervals, grps) |> + ggplot(aes(x = fu.interval, y = hazard.rate, + color = paste(grp.name, grp.value, sep = " = "))) + + geom_point() + + geom_line() + + labs( + color = "Group" + ) + + theme( + legend.position = "bottom" + ) +} + +``` +```{r} +time.intervals <- c(rep(1/3, 3), rep(1, 4)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) +``` + +```{r} +time.intervals <- rep(1, 5) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) +``` + +```{r} +time.intervals <- rep(1/3, 15) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) +``` + +```{r} +time.intervals <- c(rep(1/3, 3*3), rep(1, 2)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) +``` + +```{r} +time.intervals <- c(rep(1/4, 4), rep(1, 4)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) +``` + +```{r} +time.intervals <- c(rep(1/4, 4*2), rep(1, 3)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) +``` + +```{r} +time.intervals <- c(rep(1/4, 4*2), rep(1/3, 3*3), rep(1, 6)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type")) +``` + +--- + +Compare + +```{r} +time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) +get.life.table.by.groups(dat, time.intervals, grps) +``` + +```{r} +hazard.df +``` + + + +## Exercise 4 + +Show a plot with Kaplan-Meier survival curves for the two donor types. + +```{r} +km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat[dat$follow.up <= 12, ]) + +plot(km.tx, col = c("blue", "orange")) +legend(legend = c("cadaveric", "living"), "bottomleft", lwd = 2, col = c("blue", "orange")) +``` + +## Exercise 5 + +Fit a univariate Cox model with predictor donor type. Report +the hazard ratio and 95% confidence interval and interpret the result obtained. + +```{r} +cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat) +summary(cox.tx) +``` + +```{r} +log.hazard.ratio.hat <- cox.tx$coefficient +hazard.ratio.hat <- log.hazard.ratio.hat |> exp() + +``` + +```{r} +log.hazard.ratio.se <- summary(cox.tx)$coefficients[3] +log.hazard.ratio.hat.lower.bound <- log.hazard.ratio.hat - qnorm(0.975) * log.hazard.ratio.se +log.hazard.ratio.hat.upper.bound <- log.hazard.ratio.hat + qnorm(0.975) * log.hazard.ratio.se + +c(log.hazard.ratio.hat.lower.bound, log.hazard.ratio.hat.upper.bound) |> exp() +``` + +::: {.callout-note} +Patients receiving kidney transplants from living donors (`tx.type = 1`) has +1.91 higher hazard rate than those receiving from cadaveric donors +(`tx.type = 0`). + +Since 1 is not in the 95% confidence interval, the result is statistically +significant. +::: + + +# Exercise 6 + +Research shows that an important determinant of mortality +after kidney transplant is the age of the recipient. Fit a Cox model with age +as predictor and estimate the hazard ratio and its confidence interval. Consider +age first as continuous variable and then divide into categories. + +## Continuous + +```{r} +cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) +cox.age.rec |> summary() +``` + +## Categorical (Case 1) + +```{r} +recode.age <- function(dat, ages) { + df$age.group <- sapply(df$follow.up, function(time) { + time.points[time <= time.points][1] + }) + + df +} +``` + +```{r} +dat.age.rec <- dat[!is.na(dat$age.rec), ] +class.age <- function(age) { + if (age < 2) { + return("0-1") + } else if (age < 6) { + return("2-5") + } else if (age < 11) { + return("6-10") + } else if (age < 19) { + return("11-18") + } +} +dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |> + factor(levels = c( + "0-1", + "2-5", + "6-10", + "11-18" + )) + + +cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec) +``` + +```{r} +emmeans(cox.age.rec, pairwise ~ age.group, type = "response") +``` + + + +# Exercise 7 + +Fit a multivariate Cox model by using other predictors and +describe your results. + + +```{r} +sapply(names(dat), function(col) { + is.na(dat[, col]) |> sum() +}) +``` + +```{r} +names(dat) +``` + +```{r} +Waldtest <- function(cox.fit, i.betas){ + q <- length(i.betas) + coefs <- cox.fit$coefficients + # print(cox.fit$var) + var <- cox.fit$var[i.betas, i.betas] + Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] + p.value <- 1-pchisq(as.numeric(Wald), df = q) + return(cbind(Wald, p.value)) +} +``` + + +```{r} +fit.cox.model <- function(surv, i.betas) { + fit.tx <- coxph(surv, data = dat) + Waldtest(fit.tx, i.betas) +} +``` + +Start with `tx.type` + +```{r} +surv <- Surv(follow.up, death) ~ tx.type + age.rec +fit.cox.model(surv, 1:2) # NOTE: Why is this 1??? +``` + +Iteration 1: `tx.type + age + ?` + +```{r} +names(dat) +``` + +```{r} +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + year +fit.cox.model(surv, 3) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex +fit.cox.model(surv, 3) +``` + +Add `hla.match` + +Iteration 2: `tx.type + age.rec + hla.match + ?` + +```{r} +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor +fit.cox.model(surv, 4) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + cold.isc +fit.cox.model(surv, 4) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + year +fit.cox.model(surv, 4) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + sex +fit.cox.model(surv, 4) +``` + +Add `age.donor` + +Iteration 2: `tx.type + age.rec + hla.match + age.donor + ?` + +```{r} +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + cold.isc +fit.cox.model(surv, 5) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + year +fit.cox.model(surv, 5) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + sex +fit.cox.model(surv, 5) +``` + +Add `year` + +```{r} +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + year + cold.isc +fit.cox.model(surv, 6) +surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + year + sex +fit.cox.model(surv, 6) +``` + +```{r} + +``` + + +Next, we test for each remaining variables + +# Exercise 8 + +Estimate the survival function for specific covariate patterns. +Based on the previous results choose the best predictors. + +Exercise 9 — Check the proportional hazards assumption. You may use the +function cox.zph. Discuss the result and possible implications. + +H0: PH-assumption holds +H1: PH-assumption doesn't hold + +```{r} +# different cox model!! +cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat) +cox.zph(cox.final) +cox.final +``` + + +Exercise 10 — Plot the Schoenfeld residuals and comment. +A non-random pattern or slope in a plot of scaled residuals against time indicates a violation, suggesting the covariate's impact changes, while a horizontal line around zero supports the assumption. + +```{r} +plot(cox.zph(cox.final)) +``` + +```{r, fig.width=10, fig.height=10} +plot(cox.zph(cox.final)) +``` + + + + +NOTES: +left censoring: event happened before time zero -> not applicable +left truncation: not applicable?? +right censoring: we saw this in the follow up times, administrative censoring high + +! No left truncation, since our target population is patients who received a +kidney transplant. So we don't care about patients who died before receiving +a transplant. + +! ADD: we used a Cox model but we already saw that the PH assumption doesn't hold ! + +? remove year from prediction model, since it is not a patient characteristic and may not be stable for future predictions. It mainly captures changes in clinical practice rather than individual risk. + + diff --git a/report.html b/report.html index 8afe5ee..9a85355 100644 --- a/report.html +++ b/report.html @@ -1,2899 +1,2704 @@ - - - + + + + + + + + - foo - + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - + + + + + + + + - + -
-
+
+ + + + + +
+

Exercise 8

+

Estimate the survival function for specific covariate patterns. Based +on the previous results choose the best predictors.

+

Exercise 9 — Check the proportional hazards assumption. You may use +the function cox.zph. Discuss the result and possible implications.

H0: PH-assumption holds H1: PH-assumption doesn’t hold

-
-
# different cox model!!
-cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat)
-cox.zph(cox.final)
-
-
          chisq df       p
-tx.type    2.56  1 0.10948
-age.rec   35.98  1 2.0e-09
-hla.match  8.22  1 0.00414
-age.donor 11.05  1 0.00089
-GLOBAL    47.37  4 1.3e-09
+
# different cox model!!
+cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat)
+cox.zph(cox.final)
+
##           chisq df       p
+## tx.type    2.56  1 0.10948
+## age.rec   35.98  1 2.0e-09
+## hla.match  8.22  1 0.00414
+## age.donor 11.05  1 0.00089
+## GLOBAL    47.37  4 1.3e-09
+
cox.final
+
## Call:
+## coxph(formula = Surv(follow.up, death) ~ tx.type + age.rec + 
+##     hla.match + age.donor, data = dat)
+## 
+##                coef exp(coef)  se(coef)      z        p
+## tx.type    0.408215  1.504130  0.121728  3.354 0.000798
+## age.rec   -0.043724  0.957218  0.008875 -4.927 8.36e-07
+## hla.match -0.107323  0.898236  0.040127 -2.675 0.007483
+## age.donor -0.008810  0.991228  0.003859 -2.283 0.022437
+## 
+## Likelihood ratio test=85.48  on 4 df, p=< 2.2e-16
+## n= 9433, number of events= 449 
+##    (342 observations deleted due to missingness)
+

Exercise 10 — Plot the Schoenfeld residuals and comment. A non-random +pattern or slope in a plot of scaled residuals against time indicates a +violation, suggesting the covariate’s impact changes, while a horizontal +line around zero supports the assumption.

+
plot(cox.zph(cox.final))
+

+
plot(cox.zph(cox.final))
+

+

NOTES: left censoring: event happened before time zero -> not +applicable left truncation: not applicable?? right censoring: we saw +this in the follow up times, administrative censoring high

+

! No left truncation, since our target population is patients who +received a kidney transplant. So we don’t care about patients who died +before receiving a transplant.

+

! ADD: we used a Cox model but we already saw that the PH assumption +doesn’t hold !

+

? remove year from prediction model, since it is not a patient +characteristic and may not be stable for future predictions. It mainly +captures changes in clinical practice rather than individual risk.

-
-

Exercise 10 — Plot the Schoenfeld residuals and comment. A non-random pattern or slope in a plot of scaled residuals against time indicates a violation, suggesting the covariate’s impact changes, while a horizontal line around zero supports the assumption.

-
-
plot(cox.zph(cox.final))
-
-
-
-

-
-
-
-
-
-
-

-
-
-
-
-
-
-

-
-
-
-
-
-
-

-
-
-
-
-

NOTES: left censoring: event happened before time zero -> not applicable left truncation: not applicable?? right censoring: we saw this in the follow up times, administrative censoring high

-

! No left truncation, since our target population is patients who received a kidney transplant. So we don’t care about patients who died before receiving a transplant.

-

! ADD: we used a Cox model but we already saw that the PH assumption doesn’t hold !

-

? remove year from prediction model, since it is not a patient characteristic and may not be stable for future predictions. It mainly captures changes in clinical practice rather than individual risk.

- -
- - + + + + -
+}); + + + + + - - \ No newline at end of file + + diff --git a/slides.Rmd b/slides.Rmd index 8666997..75017a5 100644 --- a/slides.Rmd +++ b/slides.Rmd @@ -21,6 +21,7 @@ library(gt) library(ggsurvfit) library(gridExtra) library(survminer) +library(broom) ``` @@ -32,9 +33,9 @@ dat <- dat |> mutate( sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), - hla.match = factor(hla.match), - year = factor(year) - ) + # hla.match = factor(hla.match), + # year = factor(year) +) ``` ```{r} @@ -244,11 +245,11 @@ plot.tx.donor # Overall Kaplan-Meier Curve ```{r} -km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) +km.all <- survfit(Surv(follow.up, death) ~ 1, data = dat) ``` ```{r, fig.width=7, fig.height=5} -km_all |> +km.all |> ggsurvfit(type = "survival") + add_confidence_interval() + scale_y_continuous(limits = c(0, 1)) + @@ -261,7 +262,74 @@ km_all |> ``` ```{r} -summary(km_all, times = c(0, 4, 8, 12)) +header_bg <- "#A83262" # burgundy/magenta from right panel +header_text <- "white" +body_bg <- "#FFF4CC" # light cream +stripe_bg <- "#F6E8C3" # slightly darker cream +text_color <- "#333333" +border_col <- "#D8C8A5" +``` + +```{r} +times <- seq(0, 12, 2) +summary(km.all, times = times)[c("time", "n.risk", "surv")] |> + as.data.frame() |> + gt() |> + tab_header( + title = "Overall Kaplan-Meier Survival Summary" + ) |> + cols_label( + time = "Follow-up Time", + n.risk = "Number at Risk", + surv = "Survival Probability" + ) |> + fmt_number( + columns = surv, + decimals = 3 + ) |> + tab_options( + table.font.size = px(15), + heading.title.font.size = px(20), + column_labels.font.weight = "bold" + ) + +``` + +```{r} +summary(km.all, times = times)[c("time", "n.risk", "surv")] |> + as.data.frame() |> + gt() |> + tab_header( + title = "Overall Kaplan-Meier Survival Summary" + ) |> + cols_label( + time = "Follow-up Time", + n.risk = "Number at Risk", + surv = "Survival Probability" + ) |> + fmt_number(columns = surv, decimals = 3) |> + fmt_number(columns = time, decimals = 0) |> + tab_style( + style = list( + cell_fill(color = "#A83262"), + cell_text(color = "white", weight = "bold") + ), + locations = cells_column_labels() + ) |> + tab_style( + style = cell_fill(color = "#FFF4CC"), + locations = cells_body() + ) |> + opt_row_striping(row_striping = TRUE) |> + tab_options( + row.striping.background_color = "#F6E8C3", + table.border.top.color = "#A83262", + table.border.bottom.color = "#A83262", + column_labels.border.bottom.color = "#A83262", + table.font.size = px(16), + heading.title.font.size = px(18), + table.background.color = "#FFF4CC" + ) ``` # Cox Model (with intervals?) @@ -434,29 +502,229 @@ eme.age.rec.group$contrast |> # Identifying Predictors -TODO: write a more generic function +```{r} +# From exercise 9 +Waldtest <- function(coxph.object, idx) { + b <- coxph.object$coef[idx] + Sig <- coxph.object$var[idx, idx] + Wald <- b %*% solve(Sig) %*% b + p <- 1 - pchisq(as.numeric(Wald), df = length(idx)) + return(data.frame(Wald = Wald, df = length(idx), p = p)) +} + +test.new.var <- function(old.vars, new.var, i=1) { + new.surv <- reformulate( + termlabels = c(old.vars, new.var), + response = "Surv(follow.up, death)" + ) + cox.fit <- coxph(new.surv, data = dat, method = "breslow") + print(cox.fit) + Waldtest(cox.fit, i) +} +``` + +## Start with nothing + +```{r, echo=TRUE} +old.vars <- c() +test.new.var(old.vars, "tx.type", 1) +test.new.var(old.vars, "hla.match", 1) +test.new.var(old.vars, "age.donor", 1) +test.new.var(old.vars, "age.rec", 1) +test.new.var(old.vars, "cold.isc", 1) +test.new.var(old.vars, "sex", 1) +test.new.var(old.vars, "year", 1) +``` +## `tx.type` + ? + +```{r, echo=TRUE} +old.vars <- c("tx.type") + +# test.new.var(old.vars, "hla.match", 2:7) +test.new.var(old.vars, "hla.match", 2) +test.new.var(old.vars, "age.donor", 2) +test.new.var(old.vars, "age.rec", 2) +test.new.var(old.vars, "cold.isc", 2) +test.new.var(old.vars, "sex", 2) +test.new.var(old.vars, "year", 2) +``` + +`age.rec` has the smallest $p$-value + +## `tx.type` + `age.rec` + ? + +```{r, echo=TRUE} +old.vars <- c("tx.type", "age.rec") + +test.new.var(old.vars, "hla.match", 3) +test.new.var(old.vars, "age.donor", 3) +test.new.var(old.vars, "cold.isc", 3) +test.new.var(old.vars, "sex", 3) +test.new.var(old.vars, "year", 3) +``` + +`hla.match` has the smallest $p$-value + +## `tx.type` + `age.rec` + `hla.match` + ? + +```{r, echo=TRUE} +old.vars <- c("tx.type", "age.rec", "hla.match") + +test.new.var(old.vars, "age.donor", 4) +test.new.var(old.vars, "cold.isc", 4) +test.new.var(old.vars, "sex", 4) +test.new.var(old.vars, "year", 4) +``` + +`age.donor` has the smallest $p$-value + +## `tx.type` + `age.rec` + `hla.match` + `age.donor` + ? + +```{r, echo=TRUE} +old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor") + +test.new.var(old.vars, "cold.isc", 5) +test.new.var(old.vars, "sex", 5) +test.new.var(old.vars, "year", 5) +``` + +`year` has the smallest $p$-value + +## `tx.type` + `age.rec` + `hla.match` + `age.donor` + `year` + +```{r, echo=TRUE} +old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor", "year") + +test.new.var(old.vars, "cold.isc", 6) +test.new.var(old.vars, "sex", 6) +``` + +Neither has $p$-value less than 0.05. Stop. + +# Full Model ```{r} +surv.full <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match + + age.donor + year +cox.full <- coxph(surv.full, data = dat, method = "breslow") +``` + +```{r, echo=TRUE} +cox.full +``` + +```{r, fig.width=7, fig.height=5} +tidy(cox.full, exponentiate = TRUE, conf.int = TRUE) |> + mutate( + term = recode( + term, + "tx.typeLiving" = "Living donor", + "age.rec" = "Recipient age", + "hla.match" = "HLA match", + "age.donor" = "Donor age", + "year" = "Transplant year" + ) + ) |> + ggplot(aes(x = estimate, y = reorder(term, estimate))) + + geom_vline(xintercept = 1, linetype = "dashed") + + geom_errorbarh( + aes(xmin = conf.low, xmax = conf.high), + height = 0.2 + ) + + scale_x_log10() + + geom_point(size = 2) + + labs( + title = "Hazard Ratios from Full Cox Model", + x = "Hazard Ratio", + y = NULL + ) + + my.theme +``` + +## What happened to `year`? + +```{r} +ggplot(data = dat) + + geom_boxplot(aes(x = factor(year), y = follow.up)) +``` + +```{r} +ggplot(data = dat) + + geom_boxplot(aes(x = factor(year), y = follow.up)) + + facet_grid(death ~ .) +``` + +Probabily want to omit it + +## Without `year` + +```{r} +surv.full.1 <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match + + age.donor +cox.full.1 <- coxph(surv.full.1, data = dat, method = "breslow") +``` + +```{r, echo=TRUE} +cox.full.1 +``` + +```{r, fig.width=7, fig.height=5} +tidy(cox.full.1, exponentiate = TRUE, conf.int = TRUE) |> + mutate( + term = recode( + term, + "tx.typeLiving" = "Living donor", + "age.rec" = "Recipient age", + "hla.match" = "HLA match", + "age.donor" = "Donor age", + ) + ) |> + ggplot(aes(x = estimate, y = reorder(term, estimate))) + + geom_vline(xintercept = 1, linetype = "dashed") + + geom_errorbarh( + aes(xmin = conf.low, xmax = conf.high), + height = 0.2 + ) + + scale_x_log10() + + geom_point(size = 2) + + labs( + title = "Hazard Ratios from Full Cox Model (Without Year)", + x = "Hazard Ratio", + y = NULL + ) + + my.theme +``` + + + + + + + + +--- + +TODO: write a more generic function (UPDATE: FAILED) + +
+ +```{r, eval=FALSE} +# WARNING: BAD, DO NOT USE. +vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", + "tx.type") +included <- vars == "tx.type" + Waldtest <- function(cox.fit, i.betas){ q <- length(i.betas) coefs <- cox.fit$coefficients var <- cox.fit$var[i.betas, i.betas] Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] p.value <- 1-pchisq(as.numeric(Wald), df = q) - print(p.value) return(p.value) } -``` -```{r} -vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", - "tx.type") -included <- vars == "tx.type" -included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) -included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) -find.best.surv <- function(included) { - print('---') +find.model.vars <- function(included) { test.vars <- vars[!included] p.values <- sapply(test.vars, function(test.var) { new.surv <- reformulate( @@ -464,31 +732,24 @@ find.best.surv <- function(included) { response = "Surv(follow.up, death)" ) fit.tx <- coxph(new.surv, data = dat) - Waldtest(fit.tx, sum(included)) + Waldtest(fit.tx, 1) }) - if (sum(p.values > 0.05) == sum(included)) { - print(3) - return(included) - } - if (sum(included == TRUE) == length(included)) { - print(2) + if (all(p.values > 0.05)) { return(included) } - print(1) - print(which.min(p.values)) - print(included) - print(p.values) - print(which.min(p.values)) - included[which.min(p.values)] <- TRUE - print(included) - # find.best.surv(included) + new.var <- names(p.values)[which.min(p.values)] + included[which(vars == new.var)] <- TRUE + return(find.model.vars(included)) } - -find.best.surv(included) +included <- find.model.vars(included) +vars[included] ``` +
+ + ```{r} # surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match # fit.cox.model(surv, 3) diff --git a/slides.html b/slides.html index 260d904..1cc2c98 100644 --- a/slides.html +++ b/slides.html @@ -127,37 +127,90 @@ window.Quarto = { -
-

1 Introduction: research question

-
-

1.1 Survival after transplantation (M)

+
+
+
+ +
+
+Warning +
+
+
+

There are 25 repondents with follow.up = 0, 21 of which with death = 0 and other 4 with death = 1.

+
+
dat[dat$follow.up == 0, ] |>
+  group_by(death) |>
+  summarise(n = n())
+
+
# A tibble: 2 × 2
+  death     n
+  <int> <int>
+1     0    21
+2     1     4
+
+
+

Reasons to omit them, that is, have dat <- dat[dat$follow.up > 0,] at the very beginning:

+
    +
  • If death = 1, transplant failed. They shouldn’t be included in the research question “survival rate of a successful tranplant”.
  • +
  • If death = 0, transplant was successful, but they were immediately removed from populatoin at risk, contributiong nothing to the analysis.
  • +
+
+
+
+

1 Title

-
-

1.2 Identify Predictors (M)

+
+

2 Table of Contents (?)

-
-

1.3 Why Survival Analysis (L)

-

Motivation: study the distribution of time to event \(T\).

-

Example: time of death after kidney transplant.

+
+

3 Introduction (M)

+
+
+

4 Method & Result

+
+

4.1 Identify Predictors (M)

+
    +
  • Data source
  • +
  • Study population
  • +
+
+
+
[1] 9775
+
+
+
    +
  • Covariates
  • +
  • Outcome
  • +
+
+
+
[1] "hla.match" "age.donor" "age.rec"   "cold.isc"  "death"     "year"     
+[7] "sex"       "tx.type"   "follow.up"
+
+
+
+
+

4.2 Table (L)

-
- + + ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CharacteristicOverall
+N = 9,775
Cadaveric
+N = 5,148
Living
+N = 4,627
Sex, n(%)
+

+

+
    Female4,014 (41%)2,105 (41%)1,909 (41%)
    Male5,761 (59%)3,043 (59%)2,718 (59%)
Recipient age, median (IQR)13 (8, 16)13 (7, 16)14 (9, 16)
    Unknown972
Donor age, median (IQR)33 (21, 41)37 (31, 42)22 (15, 37)
    Unknown1131121
HLA matches, n(%)
+

+

+
    0784 (8.2%)125 (2.5%)659 (14%)
    11,504 (16%)160 (3.2%)1,344 (29%)
    21,553 (16%)228 (4.6%)1,325 (29%)
    33,778 (40%)3,039 (61%)739 (16%)
    41,278 (13%)1,007 (20%)271 (5.9%)
    5365 (3.8%)241 (4.9%)124 (2.7%)
    6279 (2.9%)153 (3.1%)126 (2.7%)
    Unknown23419539
Cold ischemic time (hours), median (IQR),7 (1, 19)1 (0, 1)19 (13, 25)
    Unknown2,2501,490760
+ +
+
+
+
+
+
+

5 Elementary Analysis: Transplant Type (L)

+
+

5.1 Transplant Type vs Sex

+

sex are similar across tx.type

+
+
+
+
+

+
+
+
+
+
+
+

5.2 Transplant Type vs Recipient Age

+
+
+
+
+

+
+
+
+
+
+
+
+

6 Elementary Analysis: Transplant Type (Cont.)

+
+

6.1 Transplant Type vs HLA Match

+

Living donors have less hla.match then cadaveric donors.

+
+
+
+
+

+
+
+
+
+
+
+

6.2 Transplant Type vs Donor Age

+
+
+
+
+

+
+
+
+
+
+
+
+

7 Overall Kaplan-Meier Curve

+
+
+
+
+

+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Overall Kaplan-Meier Survival Summary
Follow-up TimeNumber at RiskSurvival Probability
097751.000
264510.968
441500.953
623980.937
811970.919
104920.901
12480.888
+ +
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Overall Kaplan-Meier Survival Summary
Follow-up TimeNumber at RiskSurvival Probability
097751.000
264510.968
441500.953
623980.937
811970.919
104920.901
12480.888
+ +
+
+
+
+
+

8 Cox Model (with intervals?)

+
+
+

9 Cox Model: Transplant Type

+
+
+
Call:
+coxph(formula = Surv(follow.up, death) ~ tx.type, data = dat)
+
+  n= 9775, number of events= 465 
+
+                 coef exp(coef) se(coef)     z Pr(>|z|)    
+tx.typeLiving 0.64469   1.90539  0.09558 6.745 1.53e-11 ***
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+              exp(coef) exp(-coef) lower .95 upper .95
+tx.typeLiving     1.905     0.5248      1.58     2.298
+
+Concordance= 0.586  (se = 0.012 )
+Likelihood ratio test= 47.1  on 1 df,   p=7e-12
+Wald test            = 45.5  on 1 df,   p=2e-11
+Score (logrank) test = 47.09  on 1 df,   p=7e-12
+
+
+
+
+
+
+

+
+
+
+
+
+
+
Call: survfit(formula = Surv(follow.up, death) ~ tx.type, data = dat)
+
+                tx.type=Cadaveric 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0   5148       0    1.000 0.00000        1.000        1.000
+    4   2297     130    0.967 0.00294        0.961        0.973
+    8    711      34    0.943 0.00514        0.933        0.953
+   12     27      13    0.908 0.01135        0.886        0.931
+
+                tx.type=Living 
+ time n.risk n.event survival  std.err lower 95% CI upper 95% CI
+    0   4627       4    0.999 0.000432        0.998        1.000
+    4   1853     225    0.937 0.004212        0.928        0.945
+    8    486      52    0.891 0.007720        0.876        0.906
+   12     21       7    0.864 0.013257        0.838        0.890
+
+
+
+
+

10 Cox Model: Age Recipient (Continuous)

+
+
+
Call:
+coxph(formula = Surv(follow.up, death) ~ age.rec, data = dat)
+
+  n= 9766, number of events= 464 
+   (9 observations deleted due to missingness)
+
+             coef exp(coef)  se(coef)      z Pr(>|z|)    
+age.rec -0.042550  0.958342  0.008434 -5.045 4.53e-07 ***
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+        exp(coef) exp(-coef) lower .95 upper .95
+age.rec    0.9583      1.043    0.9426    0.9743
+
+Concordance= 0.586  (se = 0.016 )
+Likelihood ratio test= 24.81  on 1 df,   p=6e-07
+Wald test            = 25.45  on 1 df,   p=5e-07
+Score (logrank) test = 25.75  on 1 df,   p=4e-07
+
+
+
+
+
+
+

+
+
+
+
+
+ +View life table + +
+
+
Call: survfit(formula = Surv(follow.up, death) ~ age.rec, data = dat)
+
+9 observations deleted due to missingness 
+                age.rec=0 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0     93       1    0.989  0.0107        0.969         1.00
+    4     34      17    0.784  0.0460        0.699         0.88
+    8     13       0    0.784  0.0460        0.699         0.88
+
+                age.rec=1 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    394       0    1.000  0.0000        1.000        1.000
+    4    158      32    0.900  0.0173        0.866        0.934
+    8     60       3    0.876  0.0217        0.835        0.920
+   12      1       0    0.876  0.0217        0.835        0.920
+
+                age.rec=2 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    381       0    1.000  0.0000        1.000        1.000
+    4    161      26    0.917  0.0160        0.886        0.949
+    8     73       2    0.899  0.0204        0.859        0.939
+   12      1       2    0.860  0.0336        0.797        0.929
+
+                age.rec=3 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    277       0    1.000  0.0000        1.000        1.000
+    4    130      15    0.938  0.0157        0.907        0.969
+    8     44       2    0.915  0.0223        0.872        0.960
+
+                age.rec=4 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    279       0    1.000  0.0000        1.000        1.000
+    4    129      14    0.942  0.0151        0.913        0.972
+    8     44       5    0.895  0.0257        0.846        0.947
+   12      1       0    0.895  0.0257        0.846        0.947
+
+                age.rec=5 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    286       0    1.000  0.0000        1.000        1.000
+    4    133      10    0.955  0.0141        0.928        0.983
+    8     49       3    0.920  0.0251        0.872        0.971
+   12      4       1    0.898  0.0330        0.835        0.965
+
+                age.rec=6 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    302       0    1.000  0.0000        1.000        1.000
+    4    140      10    0.961  0.0123        0.937        0.985
+    8     53       0    0.961  0.0123        0.937        0.985
+   12      2       0    0.961  0.0123        0.937        0.985
+
+                age.rec=7 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    333       0    1.000  0.0000        1.000        1.000
+    4    162      14    0.948  0.0140        0.921        0.976
+    8     62       2    0.932  0.0177        0.898        0.967
+   12      3       2    0.859  0.0546        0.759        0.973
+
+                age.rec=8 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    371       0    1.000  0.0000        1.000        1.000
+    4    183      12    0.964  0.0102        0.944        0.984
+    8     72       3    0.935  0.0199        0.897        0.975
+   12      1       3    0.869  0.0416        0.791        0.954
+
+                age.rec=9 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    388       0    1.000  0.0000        1.000        1.000
+    4    171      12    0.953  0.0137        0.927        0.980
+    8     63       7    0.891  0.0268        0.840        0.945
+   12      5       0    0.891  0.0268        0.840        0.945
+
+                age.rec=10 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    430       1    0.998 0.00232        0.993        1.000
+    4    203       7    0.976 0.00876        0.959        0.993
+    8     69       4    0.943 0.01874        0.907        0.981
+   12      3       2    0.846 0.06716        0.725        0.989
+
+                age.rec=11 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    443       0    1.000  0.0000        1.000        1.000
+    4    198       8    0.976  0.0087        0.959        0.993
+    8     63       6    0.921  0.0242        0.875        0.970
+   12      4       1    0.881  0.0455        0.796        0.975
+
+                age.rec=12 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    541       1    0.998 0.00185        0.995        1.000
+    4    246      15    0.963 0.00943        0.944        0.981
+    8     69       5    0.933 0.01684        0.900        0.966
+   12      2       1    0.904 0.03300        0.841        0.971
+
+                age.rec=13 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    644       0    1.000 0.00000        1.000        1.000
+    4    282      22    0.954 0.00981        0.935        0.974
+    8     57       4    0.922 0.02042        0.883        0.963
+   12      4       2    0.872 0.04013        0.796        0.954
+
+                age.rec=14 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    685       0    1.000 0.00000        1.000        1.000
+    4    303      15    0.971 0.00753        0.957        0.986
+    8     59       4    0.945 0.01537        0.916        0.976
+   12      2       1    0.909 0.03860        0.837        0.988
+
+                age.rec=15 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    874       1    0.999 0.00114        0.997        1.000
+    4    336      36    0.945 0.00910        0.927        0.963
+    8     66       6    0.918 0.01432        0.890        0.946
+   12      3       0    0.918 0.01432        0.890        0.946
+
+                age.rec=16 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0    935       0    1.000  0.0000        1.000        1.000
+    4    375      29    0.956  0.0082        0.941        0.973
+    8     86       8    0.924  0.0145        0.896        0.953
+   12      4       0    0.924  0.0145        0.896        0.953
+
+                age.rec=17 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0   1073       0    1.000 0.00000        1.000        1.000
+    4    410      28    0.961 0.00753        0.946        0.976
+    8     90      16    0.889 0.02020        0.850        0.929
+   12      1       2    0.854 0.03175        0.794        0.918
+
+                age.rec=18 
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    0   1037       0    1.000 0.00000        1.000        1.000
+    4    395      32    0.955 0.00802        0.940        0.971
+    8    104       6    0.931 0.01309        0.906        0.957
+   12      7       2    0.905 0.02210        0.863        0.949
+
+
+
+
+
+

11 Cox Model: Age Recipient (Categorical)

+
+
cox.age.rec.group
+
+
Call:
+coxph(formula = Surv(follow.up, death) ~ age.rec.group, data = dat.age.rec.group)
+
+                      coef exp(coef) se(coef)      z        p
+age.rec.group2-5   -0.5947    0.5517   0.1767 -3.366 0.000764
+age.rec.group6-10  -1.0578    0.3472   0.1776 -5.956 2.59e-09
+age.rec.group11-18 -1.0192    0.3609   0.1513 -6.736 1.63e-11
+
+Likelihood ratio test=44.93  on 3 df, p=9.559e-10
+n= 9766, number of events= 464 
+   (9 observations deleted due to missingness)
+
+
+ +
+
eme.age.rec.group <- emmeans(cox.age.rec.group, pairwise ~ age.rec.group,
+                             type = "response")
+eme.age.rec.group
+
+
$emmeans
+ age.rec.group response     SE  df asymp.LCL asymp.UCL
+ 0-1              1.000 0.0000 Inf     1.000     1.000
+ 2-5              0.552 0.0975 Inf     0.390     0.780
+ 6-10             0.347 0.0617 Inf     0.245     0.492
+ 11-18            0.361 0.0546 Inf     0.268     0.485
+
+Confidence level used: 0.95 
+Intervals are back-transformed from the log scale 
+
+$contrasts
+ contrast         ratio    SE  df null z.ratio p.value
+ (0-1) / (2-5)    1.813 0.320 Inf    1   3.366  0.0043
+ (0-1) / (6-10)   2.880 0.511 Inf    1   5.956  <.0001
+ (0-1) / (11-18)  2.771 0.419 Inf    1   6.736  <.0001
+ (2-5) / (6-10)   1.589 0.251 Inf    1   2.928  0.0179
+ (2-5) / (11-18)  1.529 0.196 Inf    1   3.314  0.0051
+ (6-10) / (11-18) 0.962 0.124 Inf    1  -0.298  0.9908
+
+P value adjustment: tukey method for comparing a family of 4 estimates 
+Tests are performed on the log scale 
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

12 Identifying Predictors

+
+

12.1 Start with nothing

+
+
old.vars <- c()
+test.new.var(old.vars, "tx.type", 1)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                 coef exp(coef) se(coef)     z        p
+tx.typeLiving 0.64466   1.90534  0.09558 6.745 1.53e-11
+
+Likelihood ratio test=47.1  on 1 df, p=6.742e-12
+n= 9775, number of events= 465 
+
+
+
      Wald df            p
+1 45.49594  1 1.529554e-11
+
+
test.new.var(old.vars, "hla.match", 1)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+             coef exp(coef) se(coef)      z        p
+hla.match -0.2096    0.8109   0.0351 -5.972 2.34e-09
+
+Likelihood ratio test=35.87  on 1 df, p=2.112e-09
+n= 9541, number of events= 451 
+   (234 observations deleted due to missingness)
+
+
+
     Wald df            p
+1 35.6703  1 2.336993e-09
+
+
test.new.var(old.vars, "age.donor", 1)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+               coef exp(coef)  se(coef)      z        p
+age.donor -0.021738  0.978497  0.003467 -6.269 3.62e-10
+
+Likelihood ratio test=38.96  on 1 df, p=4.319e-10
+n= 9662, number of events= 464 
+   (113 observations deleted due to missingness)
+
+
+
      Wald df           p
+1 39.30567  1 3.62387e-10
+
+
test.new.var(old.vars, "age.rec", 1)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+             coef exp(coef)  se(coef)      z        p
+age.rec -0.042545  0.958348  0.008434 -5.045 4.54e-07
+
+Likelihood ratio test=24.8  on 1 df, p=6.362e-07
+n= 9766, number of events= 464 
+   (9 observations deleted due to missingness)
+
+
+
      Wald df            p
+1 25.44856  1 4.543371e-07
+
+
test.new.var(old.vars, "cold.isc", 1)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+             coef exp(coef) se(coef)     z        p
+cold.isc 0.019094  1.019278 0.004052 4.712 2.45e-06
+
+Likelihood ratio test=20.88  on 1 df, p=4.888e-06
+n= 7525, number of events= 357 
+   (2250 observations deleted due to missingness)
+
+
+
      Wald df            p
+1 22.20574  1 2.449402e-06
+
+
test.new.var(old.vars, "sex", 1)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+            coef exp(coef) se(coef)      z     p
+sexMale -0.08422   0.91923  0.09374 -0.898 0.369
+
+Likelihood ratio test=0.8  on 1 df, p=0.37
+n= 9775, number of events= 465 
+
+
+
       Wald df         p
+1 0.8072063  1 0.3689475
+
+
test.new.var(old.vars, "year", 1)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+         coef exp(coef) se(coef)      z     p
+year -0.03667   0.96399  0.01550 -2.366 0.018
+
+Likelihood ratio test=5.62  on 1 df, p=0.0178
+n= 9775, number of events= 465 
+
+
+
      Wald df          p
+1 5.595603  1 0.01800561
+
+
+
+
+

12.2 tx.type + ?

+
+
old.vars <- c("tx.type")
+
+# test.new.var(old.vars, "hla.match", 2:7)
+test.new.var(old.vars, "hla.match", 2)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                  coef exp(coef) se(coef)      z        p
+tx.typeLiving  0.45336   1.57359  0.11146  4.068 4.75e-05
+hla.match     -0.12459   0.88286  0.03959 -3.147  0.00165
+
+Likelihood ratio test=52.43  on 2 df, p=4.122e-12
+n= 9541, number of events= 451 
+   (234 observations deleted due to missingness)
+
+
+
      Wald df           p
+1 9.905243  1 0.001648086
+
+
test.new.var(old.vars, "age.donor", 2)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.478191  1.613154  0.106660  4.483 7.35e-06
+age.donor     -0.013523  0.986568  0.003745 -3.611 0.000305
+
+Likelihood ratio test=59.05  on 2 df, p=1.506e-13
+n= 9662, number of events= 464 
+   (113 observations deleted due to missingness)
+
+
+
     Wald df            p
+1 13.0412  1 0.0003047131
+
+
test.new.var(old.vars, "age.rec", 2)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.693775  2.001255  0.096157  7.215 5.39e-13
+age.rec       -0.049081  0.952104  0.008536 -5.750 8.92e-09
+
+Likelihood ratio test=78.72  on 2 df, p=< 2.2e-16
+n= 9766, number of events= 464 
+   (9 observations deleted due to missingness)
+
+
+
      Wald df            p
+1 33.06396  1 8.917646e-09
+
+
test.new.var(old.vars, "cold.isc", 2)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                  coef exp(coef) se(coef)     z       p
+tx.typeLiving 0.477550  1.612119 0.171044 2.792 0.00524
+cold.isc      0.004924  1.004937 0.006728 0.732 0.46420
+
+Likelihood ratio test=28.72  on 2 df, p=5.811e-07
+n= 7525, number of events= 357 
+   (2250 observations deleted due to missingness)
+
+
+
       Wald df         p
+1 0.5357515  1 0.4641988
+
+
test.new.var(old.vars, "sex", 2)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                  coef exp(coef) se(coef)      z       p
+tx.typeLiving  0.64499   1.90597  0.09558  6.748 1.5e-11
+sexMale       -0.08657   0.91707  0.09374 -0.924   0.356
+
+Likelihood ratio test=47.95  on 2 df, p=3.87e-11
+n= 9775, number of events= 465 
+
+
+
       Wald df         p
+1 0.8528929  1 0.3557352
+
+
test.new.var(old.vars, "year", 2)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                  coef exp(coef) se(coef)      z        p
+tx.typeLiving  0.63806   1.89281  0.09563  6.672 2.52e-11
+year          -0.03310   0.96744  0.01548 -2.138   0.0325
+
+Likelihood ratio test=51.69  on 2 df, p=5.967e-12
+n= 9775, number of events= 465 
+
+
+
      Wald df         p
+1 4.573044  1 0.0324788
+
+
+

age.rec has the smallest \(p\)-value

+
+
+

12.3 tx.type + age.rec + ?

+
+
old.vars <- c("tx.type", "age.rec")
+
+test.new.var(old.vars, "hla.match", 3)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.512664  1.669733  0.112875  4.542 5.58e-06
+age.rec       -0.048268  0.952878  0.008688 -5.556 2.77e-08
+hla.match     -0.113954  0.892299  0.039929 -2.854  0.00432
+
+Likelihood ratio test=81.92  on 3 df, p=< 2.2e-16
+n= 9535, number of events= 450 
+   (240 observations deleted due to missingness)
+
+
+
      Wald df           p
+1 8.144605  1 0.004318947
+
+
test.new.var(old.vars, "age.donor", 3)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.571325  1.770611  0.107377  5.321 1.03e-07
+age.rec       -0.044085  0.956872  0.008718 -5.057 4.26e-07
+age.donor     -0.009688  0.990359  0.003797 -2.551   0.0107
+
+Likelihood ratio test=83.3  on 3 df, p=< 2.2e-16
+n= 9655, number of events= 463 
+   (120 observations deleted due to missingness)
+
+
+
      Wald df          p
+1 6.508773  1 0.01073435
+
+
test.new.var(old.vars, "cold.isc", 3)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z       p
+tx.typeLiving  0.489852  1.632075  0.170731  2.869 0.00412
+age.rec       -0.025545  0.974779  0.010019 -2.550 0.01078
+cold.isc       0.005637  1.005653  0.006709  0.840 0.40085
+
+Likelihood ratio test=35.04  on 3 df, p=1.194e-07
+n= 7519, number of events= 357 
+   (2256 observations deleted due to missingness)
+
+
+
      Wald df         p
+1 0.705761  1 0.4008547
+
+
test.new.var(old.vars, "sex", 3)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.695122  2.003953  0.096164  7.229 4.88e-13
+age.rec       -0.050004  0.951226  0.008569 -5.835 5.37e-09
+sexMale       -0.130416  0.877730  0.094113 -1.386    0.166
+
+Likelihood ratio test=80.63  on 3 df, p=< 2.2e-16
+n= 9766, number of events= 464 
+   (9 observations deleted due to missingness)
+
+
+
      Wald df         p
+1 1.920263  1 0.1658277
+
+
test.new.var(old.vars, "year", 3)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.687661  1.989059  0.096215  7.147 8.86e-13
+age.rec       -0.049029  0.952153  0.008537 -5.743 9.30e-09
+year          -0.033478  0.967076  0.015475 -2.163   0.0305
+
+Likelihood ratio test=83.42  on 3 df, p=< 2.2e-16
+n= 9766, number of events= 464 
+   (9 observations deleted due to missingness)
+
+
+
      Wald df          p
+1 4.680122  1 0.03051359
+
+
+

hla.match has the smallest \(p\)-value

+
+
+

12.4 tx.type + age.rec + hla.match + ?

+
+
old.vars <- c("tx.type", "age.rec", "hla.match")
+
+test.new.var(old.vars, "age.donor", 4)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.408204  1.504113  0.121727  3.353 0.000798
+age.rec       -0.043716  0.957225  0.008875 -4.926 8.39e-07
+hla.match     -0.107306  0.898251  0.040127 -2.674 0.007491
+age.donor     -0.008810  0.991229  0.003859 -2.283 0.022443
+
+Likelihood ratio test=85.46  on 4 df, p=< 2.2e-16
+n= 9433, number of events= 449 
+   (342 observations deleted due to missingness)
+
+
+
      Wald df          p
+1 5.211086  1 0.02244331
+
+
test.new.var(old.vars, "cold.isc", 4)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z      p
+tx.typeLiving  0.349594  1.418492  0.182948  1.911 0.0560
+age.rec       -0.023299  0.976970  0.010119 -2.302 0.0213
+hla.match     -0.071073  0.931394  0.044036 -1.614 0.1065
+cold.isc       0.006428  1.006449  0.006717  0.957 0.3386
+
+Likelihood ratio test=33.68  on 4 df, p=8.682e-07
+n= 7381, number of events= 352 
+   (2394 observations deleted due to missingness)
+
+
+
       Wald df         p
+1 0.9158816  1 0.3385587
+
+
test.new.var(old.vars, "sex", 4)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.515100  1.673806  0.112902  4.562 5.06e-06
+age.rec       -0.048944  0.952234  0.008721 -5.612 2.00e-08
+hla.match     -0.112940  0.893204  0.039936 -2.828  0.00468
+sexMale       -0.094882  0.909480  0.095783 -0.991  0.32188
+
+Likelihood ratio test=82.9  on 4 df, p=< 2.2e-16
+n= 9535, number of events= 450 
+   (240 observations deleted due to missingness)
+
+
+
       Wald df         p
+1 0.9812753  1 0.3218842
+
+
test.new.var(old.vars, "year", 4)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.494911  1.640353  0.113403  4.364 1.28e-05
+age.rec       -0.048129  0.953011  0.008689 -5.539 3.04e-08
+hla.match     -0.122559  0.884654  0.040341 -3.038  0.00238
+year          -0.035190  0.965422  0.015831 -2.223  0.02622
+
+Likelihood ratio test=86.88  on 4 df, p=< 2.2e-16
+n= 9535, number of events= 450 
+   (240 observations deleted due to missingness)
+
+
+
      Wald df          p
+1 4.941346  1 0.02622161
+
+
+

age.donor has the smallest \(p\)-value

+
+
+

12.5 tx.type + age.rec + hla.match + age.donor + ?

+
+
old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor")
+
+test.new.var(old.vars, "cold.isc", 5)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z      p
+tx.typeLiving  0.355582  1.427011  0.188970  1.882 0.0599
+age.rec       -0.021491  0.978738  0.010298 -2.087 0.0369
+hla.match     -0.067709  0.934532  0.044223 -1.531 0.1257
+age.donor     -0.003508  0.996499  0.004230 -0.829 0.4069
+cold.isc       0.004519  1.004530  0.006774  0.667 0.5047
+
+Likelihood ratio test=34.04  on 5 df, p=2.333e-06
+n= 7347, number of events= 351 
+   (2428 observations deleted due to missingness)
+
+
+
      Wald df         p
+1 0.445129  1 0.5046572
+
+
test.new.var(old.vars, "sex", 5)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.411943  1.509748  0.121783  3.383 0.000718
+age.rec       -0.044462  0.956512  0.008914 -4.988  6.1e-07
+hla.match     -0.106379  0.899084  0.040136 -2.650 0.008038
+age.donor     -0.008675  0.991362  0.003862 -2.246 0.024677
+sexMale       -0.092710  0.911458  0.095941 -0.966 0.333885
+
+Likelihood ratio test=86.39  on 5 df, p=< 2.2e-16
+n= 9433, number of events= 449 
+   (342 observations deleted due to missingness)
+
+
+
       Wald df         p
+1 0.9337712  1 0.3338849
+
+
test.new.var(old.vars, "year", 5)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.397903  1.488700  0.121903  3.264  0.00110
+age.rec       -0.043831  0.957116  0.008875 -4.939 7.87e-07
+hla.match     -0.115648  0.890789  0.040537 -2.853  0.00433
+age.donor     -0.008325  0.991709  0.003854 -2.160  0.03078
+year          -0.033109  0.967433  0.015844 -2.090  0.03664
+
+Likelihood ratio test=89.84  on 5 df, p=< 2.2e-16
+n= 9433, number of events= 449 
+   (342 observations deleted due to missingness)
+
+
+
      Wald df         p
+1 4.367128  1 0.0366387
+
+
+

year has the smallest \(p\)-value

+
+
+

12.6 tx.type + age.rec + hla.match + age.donor + year

+
+
old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor", "year")
+
+test.new.var(old.vars, "cold.isc", 6)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z      p
+tx.typeLiving  0.381855  1.464999  0.189901  2.011 0.0443
+age.rec       -0.021369  0.978857  0.010295 -2.076 0.0379
+hla.match     -0.073960  0.928708  0.044568 -1.660 0.0970
+age.donor     -0.002981  0.997024  0.004226 -0.705 0.4806
+year          -0.037375  0.963315  0.019227 -1.944 0.0519
+cold.isc       0.002743  1.002747  0.006825  0.402 0.6878
+
+Likelihood ratio test=37.86  on 6 df, p=1.195e-06
+n= 7347, number of events= 351 
+   (2428 observations deleted due to missingness)
+
+
+
       Wald df         p
+1 0.1615171  1 0.6877636
+
+
test.new.var(old.vars, "sex", 6)
+
+
Call:
+coxph(formula = new.surv, data = dat, method = "breslow")
+
+                   coef exp(coef)  se(coef)      z        p
+tx.typeLiving  0.401829  1.494556  0.121963  3.295 0.000985
+age.rec       -0.044602  0.956378  0.008916 -5.003 5.66e-07
+hla.match     -0.114750  0.891589  0.040546 -2.830 0.004653
+age.donor     -0.008185  0.991848  0.003857 -2.122 0.033833
+year          -0.033222  0.967324  0.015844 -2.097 0.036011
+sexMale       -0.094191  0.910109  0.095956 -0.982 0.326293
+
+Likelihood ratio test=90.8  on 6 df, p=< 2.2e-16
+n= 9433, number of events= 449 
+   (342 observations deleted due to missingness)
+
+
+
       Wald df         p
+1 0.9635531  1 0.3262933
+
+
+

Neither has \(p\)-value less than 0.05. Stop.

+
+
+
+

13 Full Model

+
+
cox.full
+
+
Call:
+coxph(formula = surv.full, data = dat, method = "breslow")
+
+                    coef  exp(coef)   se(coef)      z       p
+tx.typeLiving  0.0734859  1.0762534  0.0253724  2.896 0.00378
+age.rec        0.0154431  1.0155630  0.0020497  7.534 4.9e-14
+hla.match     -0.0210826  0.9791381  0.0086077 -2.449 0.01431
+age.donor     -0.0009874  0.9990131  0.0008841 -1.117 0.26405
+year           0.4429003  1.5572171  0.0057586 76.910 < 2e-16
+
+Likelihood ratio test=8440  on 5 df, p=< 2.2e-16
+n= 9433, number of events= 9433 
+   (342 observations deleted due to missingness)
+
+
+
+
+
+
+

+
+
+
+
+
+

13.1 What happened to year?

+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+

Probabily want to omit it

+
+
+

13.2 Without year

+
+
cox.full.1
+
+
Call:
+coxph(formula = surv.full.1, data = dat, method = "breslow")
+
+                    coef  exp(coef)   se(coef)      z        p
+tx.typeLiving  0.0037484  1.0037554  0.0254736  0.147   0.8830
+age.rec        0.0139177  1.0140150  0.0020498  6.790 1.12e-11
+hla.match     -0.0854685  0.9180821  0.0089546 -9.545  < 2e-16
+age.donor      0.0023071  1.0023098  0.0008657  2.665   0.0077
+
+Likelihood ratio test=172.9  on 4 df, p=< 2.2e-16
+n= 9433, number of events= 9433 
+   (342 observations deleted due to missingness)
+
+
+
+
+
+
+

+
+
+
+
+
+

TODO: write a more generic function (UPDATE: FAILED)

+
+
+
+
+
+
+

14 Old

+
+
+

14.1 Death & Censored

+
+
dat |>
+  group_by(death) |>
+  summarise(
+    n = n(),
+    percentage = n() / nrow(dat) * 100
+  )
+
+
# A tibble: 2 × 3
+  death     n percentage
+  <int> <int>      <dbl>
+1     0  9310      95.2 
+2     1   465       4.76
+
+
+

Most of the patients (95.24%) were censored.

+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
   hla.match age.donor age.rec cold.isc death year    sex   tx.type follow.up
+1          2        25      15        6     0 2002 Female    Living         0
+2          1        42      10       11     1 1999 Female    Living         0
+3          1         9      14        8     0 2002   Male    Living         0
+4          0        35      12       26     0 2002   Male    Living         0
+5          2        17      12       28     1 1997   Male    Living         0
+6          3        44      13        1     0 2002   Male Cadaveric         0
+7          1        34      12        1     0 2002   Male Cadaveric         0
+8          4        38      11        2     0 2002 Female Cadaveric         0
+9          0        23      10        6     0 2002 Female    Living         0
+10         3        26       1        1     0 2002   Male Cadaveric         0
+11        NA        42      18       NA     0 2002   Male Cadaveric         0
+12         4        39       9       NA     0 2002 Female Cadaveric         0
+13         3        34      13       NA     0 2002   Male Cadaveric         0
+14         3        NA      14        2     0 2002 Female Cadaveric         0
+15         3        40      17        1     0 2002 Female Cadaveric         0
+16         3        22      14        2     0 2002 Female Cadaveric         0
+17         0        17      17       12     0 2002   Male    Living         0
+18         3        NA       7       NA     0 2002   Male Cadaveric         0
+19         0         4       0       NA     1 2001 Female    Living         0
+20         4        NA      NA        1     0 2002 Female Cadaveric         0
+21         3        42      18        1     0 2002   Male Cadaveric         0
+22         1        19      15       21     1 1994   Male    Living         0
+23         3        44      10       NA     0 2002   Male Cadaveric         0
+24         0        13      14       NA     0 2001   Male    Living         0
+25         1        NA       6        1     0 2002   Male Cadaveric         0
+
+
+
+
+
  hla.match age.donor age.rec cold.isc death year    sex   tx.type follow.up
+1         2        25      15        6     0 2002 Female    Living         0
+2         1        42      10       11     1 1999 Female    Living         0
+3         1         9      14        8     0 2002   Male    Living         0
+4         0        35      12       26     0 2002   Male    Living         0
+5         2        17      12       28     1 1997   Male    Living         0
+6         3        44      13        1     0 2002   Male Cadaveric         0
+
+
+
+
+

14.2 Cox Model - Age as a Continuous Variable

+
+
cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat)
+cox.age.rec |> summary()
+
+
Call:
+coxph(formula = Surv(follow.up, death) ~ age.rec, data = dat)
+
+  n= 9766, number of events= 464 
+   (9 observations deleted due to missingness)
+
+             coef exp(coef)  se(coef)      z Pr(>|z|)    
+age.rec -0.042550  0.958342  0.008434 -5.045 4.53e-07 ***
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+        exp(coef) exp(-coef) lower .95 upper .95
+age.rec    0.9583      1.043    0.9426    0.9743
+
+Concordance= 0.586  (se = 0.016 )
+Likelihood ratio test= 24.81  on 1 df,   p=6e-07
+Wald test            = 25.45  on 1 df,   p=5e-07
+Score (logrank) test = 25.75  on 1 df,   p=4e-07
+
+
+
+
+

14.3 Cox Model - Age as a Categorical Variable

+
+
+
          chisq df       p
+tx.type    2.46  1 0.11654
+age.rec   36.09  1 1.9e-09
+hla.match  8.63  1 0.00331
+age.donor 10.88  1 0.00097
+year       3.96  1 0.04666
+GLOBAL    52.00  5 5.4e-10
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

14.4 Table 1 (L)

+
+

14.4.1 HLA match hla.match

+
+
+

+   0    1    2    3    4    5    6 <NA> 
+ 784 1504 1553 3778 1278  365  279  234 
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+ +
+
+Warning +
+
+
+

TODO: change to percentage within each tx.type

+
+
+
+
+
+ +
+
+Note +
+
+
+

Living donors have less hla.match then cadaveric donors

+
+
+
+
+

14.4.2 Donor age age.donor

+
+
mean(dat$age.donor, na.rm = TRUE)
+
+
[1] 31.29952
+
+
median(dat$age.donor, na.rm = TRUE)
+
+
[1] 33
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+ +
+
+Note +
+
+
+

There is age difference between different tx.type

+
+
+
+
+

14.4.3 Recipient Age age.rec

+
+
mean(dat$age.rec, na.rm = TRUE)
+
+
[1] 11.64653
+
+
median(dat$age.rec, na.rm = TRUE)
+
+
[1] 13
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

14.4.4 Cold Ischemia Time cold.isc

+
+
mean(dat$cold.isc, na.rm = TRUE)
+
+
[1] 10.85967
+
+
median(dat$cold.isc, na.rm = TRUE)
+
+
[1] 7
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

14.4.5 Transplant Type tx.type

+
+
+

+Cadaveric    Living      <NA> 
+     5148      4627         0 
+
+
+
+
dat$tx.type |> table()
+
+

+Cadaveric    Living 
+     5148      4627 
+
+
+
+
+
+
+

+
+
+
+
+
+
+

14.4.6 Year year

+
+
+

+1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 
+ 728  734  666  736  787  813  789  745  740  842  706  834  655 
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+

14.5 Kaplan-Meier

+
+

14.5.1 Overall (L)

+
+
+
Call: survfit(formula = Surv(follow.up, death) ~ 1, data = dat)
+
+ time n.risk n.event survival std.err lower 95% CI upper 95% CI
+    4   4150     359    0.953 0.00252        0.948        0.958
+    8   1197      86    0.919 0.00449        0.910        0.928
+   12     48      20    0.888 0.00867        0.871        0.905
+
+
+
+
+
+
+

+
+
+
+
+
+
+

14.5.2 tx.type (M)

+
+
+
+
+

+
+
+
+
+
+
+
+

14.6 Cox Model

+
+

14.6.1 death Distribution (?)

+
+
+
+
+

+
+
+
+
+
+
+

14.6.2 Overall (L)

+
+
+
+ +
+
+Note +
+
+
+

skip overall, jump directly to tx.type

+
+
+
+
+
# A tibble: 7 × 5
+  fu.interval n.censored n.event n.at.risk hazard.rate
+        <dbl>      <int>   <int>     <dbl>       <dbl>
+1       0.333        830     152      9775     0.0155 
+2       0.667        491      37      8793     0.00421
+3       1            613      27      8265     0.00327
+4       2           1164      62      7625     0.00813
+5       3           1143      41      6399     0.00641
+6       4           1053      40      5215     0.00767
+7       5            919      31      4122     0.00752
+
+
+
+
+

14.6.3 tx.type (M)

+
+
+

14.6.4 age (continuous) (L)

+
+
+

14.6.5 age (categorical) (M)

+
+
+

14.6.6 Full model (L)

+
+
+
Call:
+coxph(formula = Surv(follow.up, death) ~ hla.match + tx.type, 
+    data = dat)
+
+  n= 9541, number of events= 451 
+   (234 observations deleted due to missingness)
+
+                  coef exp(coef) se(coef)      z Pr(>|z|)    
+hla.match     -0.12460   0.88285  0.03959 -3.148  0.00165 ** 
+tx.typeLiving  0.45336   1.57359  0.11146  4.068 4.75e-05 ***
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+              exp(coef) exp(-coef) lower .95 upper .95
+hla.match        0.8828     1.1327    0.8169    0.9541
+tx.typeLiving    1.5736     0.6355    1.2648    1.9578
+
+Concordance= 0.611  (se = 0.014 )
+Likelihood ratio test= 52.43  on 2 df,   p=4e-12
+Wald test            = 51.54  on 2 df,   p=6e-12
+Score (logrank) test = 53.06  on 2 df,   p=3e-12
+
+
+
Call:
+coxph(formula = Surv(follow.up, death) ~ hla.match * tx.type, 
+    data = dat)
+
+  n= 9541, number of events= 451 
+   (234 observations deleted due to missingness)
+
+                            coef exp(coef) se(coef)      z Pr(>|z|)  
+hla.match               -0.08577   0.91780  0.07828 -1.096   0.2732  
+tx.typeLiving            0.59989   1.82192  0.27919  2.149   0.0317 *
+hla.match:tx.typeLiving -0.05232   0.94902  0.09086 -0.576   0.5647  
+---
+Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+                        exp(coef) exp(-coef) lower .95 upper .95
+hla.match                  0.9178     1.0896    0.7873     1.070
+tx.typeLiving              1.8219     0.5489    1.0541     3.149
+hla.match:tx.typeLiving    0.9490     1.0537    0.7942     1.134
+
+Concordance= 0.611  (se = 0.014 )
+Likelihood ratio test= 52.77  on 3 df,   p=2e-11
+Wald test            = 52.64  on 3 df,   p=2e-11
+Score (logrank) test = 54.76  on 3 df,   p=8e-12
+
+
+
Analysis of Deviance Table
+ Cox model: response is  Surv(follow.up, death)
+ Model 1: ~ hla.match + tx.type
+ Model 2: ~ hla.match * tx.type
+   loglik  Chisq Df Pr(>|Chi|)
+1 -3850.6                     
+2 -3850.5 0.3327  1     0.5641
+
+
+
+
+

14.6.7 Discussion: include year or not?

+
+
+
+

14.7 Assumption Testing

+

## Why Survival Analysis (L)

+

Motivation: study the distribution of time to event \(T\).

+

Example: time of death after kidney transplant.

+
+
+
+ @@ -633,7 +3900,7 @@ window.Quarto = {
-

+

@@ -643,23 +3910,23 @@ window.Quarto = {

With this, all we need is to fit a linear model t ~ X or log(t) ~ X

-
- @@ -1141,7 +4408,7 @@ window.Quarto = {
-

+

@@ -1151,23 +4418,23 @@ window.Quarto = {

Scenario 1: the study ends at the year 2008?

-
- @@ -1649,14 +4916,14 @@ window.Quarto = {
-

+

-

+

@@ -1666,23 +4933,23 @@ window.Quarto = {

Scenario 2: respondent 3 move away; loss follow up

-
- @@ -2164,14 +5431,14 @@ window.Quarto = {
-

+

-

+

@@ -2182,14 +5449,14 @@ window.Quarto = {
-

+

-
-

1.4 Challenges in Survival Analysis (L)

+
+

14.8 Challenges in Survival Analysis (L)

  • Right Censoring: only observe the event if it occurs before a certain time.
  • Left Censoring: event has occurred prior to the start of a research @@ -2208,616 +5475,9 @@ window.Quarto = {
  • Patients not dead before 2000 are not included in the dataset.
+
-
-
-

2 Method & Result

-
-

2.1 Explain Dataset (M)

-
-
-
- -
-
-Things to highlight in figures -
-
-
-
    -
  • sex and recipient age are similar across tx.type

  • -
  • Living donors have less hla.match then cadaveric donors

  • -
  • There is age difference between different tx.type

  • -
-
-
-
-

2.1.1 sex ~ tx.tpye

-
-
-
- -
-
-Note -
-
-
-

sex are similar across tx.type

-
-
-
-
-
-
-

-
-
-
-
-
-
-
-

2.2 Table 1 (L)

-
-

2.2.1 HLA match hla.match

-
-
-

-   0    1    2    3    4    5    6 <NA> 
- 784 1504 1553 3778 1278  365  279  234 
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
- -
-
-Warning -
-
-
-

TODO: change to percentage within each tx.type

-
-
-
-
-
- -
-
-Note -
-
-
-

Living donors have less hla.match then cadaveric donors

-
-
-
-
-

2.2.2 Donor age age.donor

-
-
mean(dat$age.donor, na.rm = TRUE)
-
-
[1] 31.29952
-
-
median(dat$age.donor, na.rm = TRUE)
-
-
[1] 33
-
-
-
-
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_count()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_count()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_boxplot()`).
-
-
-
-
-

-
-
-
-
-
-
-
- -
-
-Note -
-
-
-

There is age difference between different tx.type

-
-
-
-
-

2.2.3 Recipient Age age.rec

-
-
mean(dat$age.rec, na.rm = TRUE)
-
-
[1] 11.64653
-
-
median(dat$age.rec, na.rm = TRUE)
-
-
[1] 13
-
-
-
-
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_count()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_boxplot()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_count()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_boxplot()`).
-
-
-
-
-

-
-
-
-
-
-
-

2.2.4 Cold Ischemia Time cold.isc

-
-
mean(dat$cold.isc, na.rm = TRUE)
-
-
[1] 10.85967
-
-
median(dat$cold.isc, na.rm = TRUE)
-
-
[1] 7
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-

-
-
-
-
-
-
-

2.2.5 Transplant Type tx.type

-
-
-

-Cadaveric    Living      <NA> 
-     5148      4627         0 
-
-
-
-
dat$tx.type |> table()
-
-

-Cadaveric    Living 
-     5148      4627 
-
-
-
-
-
-
-

-
-
-
-
-
-
-

2.2.6 Year year

-
-
-

-1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 
- 728  734  666  736  787  813  789  745  740  842  706  834  655 
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_boxplot()`).
-
-
-
-
-

-
-
-
-
-
-
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_boxplot()`).
-
-
-
-
-

-
-
-
-
-
-
-
-

2.3 Kaplan-Meier

-
-

2.3.1 Overall (L)

-
-
-
Call: survfit(formula = Surv(follow.up, death) ~ 1, data = dat)
 
- time n.risk n.event survival std.err lower 95% CI upper 95% CI
-    4   4150     359    0.953 0.00252        0.948        0.958
-    8   1197      86    0.919 0.00449        0.910        0.928
-   12     48      20    0.888 0.00867        0.871        0.905
-
-
-
-
-
-
-

-
-
-
-
-
-
-

2.3.2 tx.type (M)

-
-
-
-
-

-
-
-
-
-
-
-
-

2.4 Cox Model

-
-

2.4.1 death Distribution (?)

-
-
-
-
-

-
-
-
-
-
-
-

2.4.2 Overall (L)

-
-
-
- -
-
-Note -
-
-
-

skip overall, jump directly to tx.type

-
-
-
-
-
# A tibble: 7 × 5
-  fu.interval n.censored n.event n.at.risk hazard.rate
-        <dbl>      <int>   <int>     <dbl>       <dbl>
-1       0.333        830     152      9775     0.0155 
-2       0.667        491      37      8793     0.00421
-3       1            613      27      8265     0.00327
-4       2           1164      62      7625     0.00813
-5       3           1143      41      6399     0.00641
-6       4           1053      40      5215     0.00767
-7       5            919      31      4122     0.00752
-
-
-
-
-

2.4.3 tx.type (M)

-
-
-

2.4.4 age (continuous) (L)

-
-
-

2.4.5 age (categorical) (M)

-
-
-

2.4.6 Full model (L)

-
-
-
Call:
-coxph(formula = Surv(follow.up, death) ~ hla.match + tx.type, 
-    data = dat)
-
-  n= 9541, number of events= 451 
-   (234 observations deleted due to missingness)
-
-                  coef exp(coef) se(coef)      z Pr(>|z|)   
-hla.match1    -0.08845   0.91535  0.17308 -0.511  0.60933   
-hla.match2    -0.36340   0.69531  0.17919 -2.028  0.04256 * 
-hla.match3    -0.50810   0.60164  0.18298 -2.777  0.00549 **
-hla.match4    -0.65410   0.51991  0.22221 -2.944  0.00324 **
-hla.match5    -0.39445   0.67405  0.29410 -1.341  0.17985   
-hla.match6    -0.51491   0.59755  0.31489 -1.635  0.10201   
-tx.typeLiving  0.39049   1.47771  0.12688  3.078  0.00209 **
----
-Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-
-              exp(coef) exp(-coef) lower .95 upper .95
-hla.match1       0.9153     1.0925    0.6520    1.2850
-hla.match2       0.6953     1.4382    0.4894    0.9879
-hla.match3       0.6016     1.6621    0.4203    0.8612
-hla.match4       0.5199     1.9234    0.3363    0.8037
-hla.match5       0.6740     1.4836    0.3787    1.1996
-hla.match6       0.5976     1.6735    0.3224    1.1077
-tx.typeLiving    1.4777     0.6767    1.1524    1.8949
-
-Concordance= 0.613  (se = 0.014 )
-Likelihood ratio test= 56.79  on 7 df,   p=7e-10
-Wald test            = 57.08  on 7 df,   p=6e-10
-Score (logrank) test = 59.51  on 7 df,   p=2e-10
-
-
-
Call:
-coxph(formula = Surv(follow.up, death) ~ hla.match * tx.type, 
-    data = dat)
-
-  n= 9541, number of events= 451 
-   (234 observations deleted due to missingness)
-
-                            coef exp(coef) se(coef)      z Pr(>|z|)  
-hla.match1                0.8094    2.2467   0.8165  0.991   0.3215  
-hla.match2                1.1371    3.1176   0.7530  1.510   0.1311  
-hla.match3                0.4217    1.5245   0.7140  0.591   0.5548  
-hla.match4                0.1094    1.1156   0.7341  0.149   0.8816  
-hla.match5                0.4754    1.6087   0.7820  0.608   0.5432  
-hla.match6                0.5836    1.7925   0.8021  0.728   0.4669  
-tx.typeLiving             1.3801    3.9752   0.7218  1.912   0.0559 .
-hla.match1:tx.typeLiving -0.9614    0.3823   0.8355 -1.151   0.2498  
-hla.match2:tx.typeLiving -1.6578    0.1905   0.7762 -2.136   0.0327 *
-hla.match3:tx.typeLiving -1.0096    0.3644   0.7461 -1.353   0.1760  
-hla.match4:tx.typeLiving -0.5175    0.5960   0.7863 -0.658   0.5105  
-hla.match5:tx.typeLiving -0.8601    0.4231   0.8805 -0.977   0.3286  
-hla.match6:tx.typeLiving -1.3342    0.2634   0.9113 -1.464   0.1432  
----
-Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-
-                         exp(coef) exp(-coef) lower .95 upper .95
-hla.match1                  2.2467     0.4451   0.45341   11.1322
-hla.match2                  3.1176     0.3208   0.71257   13.6400
-hla.match3                  1.5245     0.6559   0.37617    6.1787
-hla.match4                  1.1156     0.8964   0.26460    4.7032
-hla.match5                  1.6087     0.6216   0.34737    7.4498
-hla.match6                  1.7925     0.5579   0.37212    8.6342
-tx.typeLiving               3.9752     0.2516   0.96600   16.3580
-hla.match1:tx.typeLiving    0.3823     2.6155   0.07435    1.9662
-hla.match2:tx.typeLiving    0.1905     5.2480   0.04162    0.8723
-hla.match3:tx.typeLiving    0.3644     2.7444   0.08443    1.5726
-hla.match4:tx.typeLiving    0.5960     1.6778   0.12763    2.7836
-hla.match5:tx.typeLiving    0.4231     2.3635   0.07533    2.3763
-hla.match6:tx.typeLiving    0.2634     3.7971   0.04414    1.5712
-
-Concordance= 0.615  (se = 0.014 )
-Likelihood ratio test= 66.78  on 13 df,   p=3e-09
-Wald test            = 65.9  on 13 df,   p=5e-09
-Score (logrank) test = 69.92  on 13 df,   p=8e-10
-
-
-
Analysis of Deviance Table
- Cox model: response is  Surv(follow.up, death)
- Model 1: ~ hla.match + tx.type
- Model 2: ~ hla.match * tx.type
-   loglik  Chisq Df Pr(>|Chi|)
-1 -3848.5                     
-2 -3843.5 9.9891  6     0.1251
-
-
-
-
-

2.4.7 Discussion: include year or not?

-
-
-
-

2.5 Assumption Testing

-
-
-
-

3 Conclusion (M + L)

-