diff --git a/publish.sh b/publish.sh
index 04c97b0..f319fd5 100644
--- a/publish.sh
+++ b/publish.sh
@@ -19,6 +19,6 @@ git add -A
git commit -m "Deploy static site"
# Push this folder as the gh-pages branch of chmee/kidney
-git push -f git@github.com:chmlee/kidney.git master:gh-pages
+git push -f git@github.com:chmee/kidney.git main:gh-pages
cd -
diff --git a/report.Rmd b/report.Rmd
index f51fe1b..89e97dc 100644
--- a/report.Rmd
+++ b/report.Rmd
@@ -14,7 +14,6 @@ library(survival)
library(emmeans)
library(foreign)
library(gtsummary)
-library(ggsurvfit)
```
```{r}
@@ -115,29 +114,6 @@ table.split <- dat.table1 |>
table.split
```
-```{r}
-table.split <- dat.table1 |>
- select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |>
- tbl_summary(
- by = tx.type,
- statistic = list(
- all_continuous() ~ "{median} ({p25}, {p75})"
- ),
- label = list(
- hla.match ~ "HLA matches, n(%)",
- age.donor ~ "Donor age, median (IQR)",
- age.rec ~ "Recipient age, median (IQR)",
- cold.isc ~ "Cold ischemic time (hours), median (IQR), ",
- sex ~ "Sex, n(%)"
- ),
- missing = "ifany"
- ) |>
- add_overall() |>
- modify_footnote(all_stat_cols() ~ NA)
-
-table.split
-```
-
Calculate median follow-up (reverse Kaplan-Meier method), with 95% CI
```{r}
@@ -391,6 +367,9 @@ recode.dat <- function(dat, time.intervals) {
df
}
+
+
+
```
```{r}
@@ -546,25 +525,29 @@ recode.age <- function(dat, ages) {
dat.age.rec <- dat[!is.na(dat$age.rec), ]
class.age <- function(age) {
if (age < 2) {
- return("0-1")
+ return("0,1")
} else if (age < 6) {
- return("2-5")
+ return("2,3,4,5")
} else if (age < 11) {
- return("6-10")
+ return("6,7,8,9,10")
} else if (age < 19) {
return("11-18")
}
}
+```
+
+```{r}
dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |>
factor(levels = c(
- "0-1",
- "2-5",
- "6-10",
+ "0,1",
+ "2,3,4,5",
+ "6,7,8,9,10",
"11-18"
))
cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec)
+cox.age.rec |> summary()
```
```{r}
@@ -572,7 +555,6 @@ emmeans(cox.age.rec, pairwise ~ age.group, type = "response")
```
-
# Exercise 7
Fit a multivariate Cox model by using other predictors and
@@ -699,7 +681,6 @@ H1: PH-assumption doesn't hold
# different cox model!!
cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat)
cox.zph(cox.final)
-cox.final
```
@@ -710,11 +691,6 @@ A non-random pattern or slope in a plot of scaled residuals against time indicat
plot(cox.zph(cox.final))
```
-```{r, fig.width=10, fig.height=10}
-plot(cox.zph(cox.final))
-```
-
-
NOTES:
diff --git a/slides.Rmd b/slides.Rmd
deleted file mode 100644
index 024b4b4..0000000
--- a/slides.Rmd
+++ /dev/null
@@ -1,1233 +0,0 @@
----
-title: Slides
-execute:
- cache: true
- freeze: auto
- include: true
- echo: false
- warning: false
-number-sections: true
----
-
-```{r}
-library(tidyverse)
-library(dplyr)
-library(ggplot2)
-library(survival)
-library(emmeans)
-library(foreign)
-library(gtsummary)
-library(gt)
-library(ggsurvfit)
-library(gridExtra)
-library(survminer)
-library(viridis)
-```
-
-
-```{r}
-dat <- read.csv("./unos.txt", sep = "\t")
-names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
- "year", "sex", "tx.type", "follow.up")
-dat <- dat |>
- mutate(
- sex = factor(sex, levels = c(0,1), labels = c("Female","Male")),
- tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")),
- hla.match = factor(hla.match),
- year = factor(year)
- )
-```
-
-```{r}
-my.theme <- theme_minimal() +
- theme(
- legend.position = "bottom",
- plot.title = element_text(size = 17, face = "bold"),
- axis.title = element_text(size = 15),
- axis.text = element_text(size = 12),
- legend.text = element_text(size = 12),
- plot.margin = margin(20, 30, 20, 30)
- )
-```
-
-::: {.callout-warning}
-There are 25 repondents with `follow.up = 0`, 21 of which with `death = 0` and
-other 4 with `death = 1`.
-
-```{r, echo=TRUE}
-dat[dat$follow.up == 0, ] |>
- group_by(death) |>
- summarise(n = n())
-```
-
-Reasons to omit them, that is, have `dat <- dat[dat$follow.up > 0,]` at the
-very beginning:
-
-- If `death = 1`, transplant failed. They shouldn't be included in the research
- question "survival rate of a *successful* tranplant".
-- If `death = 0`, transplant was successful, but they were immediately removed
- from populatoin at risk, contributiong nothing to the analysis.
-:::
-
-# Title
-
-# Table of Contents (?)
-
-# Introduction (M)
-
-# Method & Result
-
-## Identify Predictors (M)
-
-- Data source
-- Study population
-```{r}
-nrow(dat)
-```
-- Covariates
-- Outcome
-```{r}
-names(dat)
-```
-
-## Table (L)
-
-```{r}
-dat |>
- select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |>
- tbl_summary(
- by = tx.type,
- statistic = list(
- all_continuous() ~ "{median} ({p25}, {p75})"
- ),
- label = list(
- hla.match ~ "HLA matches, n(%)",
- age.donor ~ "Donor age, median (IQR)",
- age.rec ~ "Recipient age, median (IQR)",
- cold.isc ~ "Cold ischemic time (hours), median (IQR), ",
- sex ~ "Sex, n(%)"
- ),
- missing = "ifany"
- ) |>
- add_overall() |>
- modify_footnote(all_stat_cols() ~ NA)
-```
-
-# Elementary Analysis: Transplant Type (L)
-
-## Transplant Type vs Sex
-
-`sex` are similar across `tx.type`
-
-```{r, fig.width=7, fig.height=5}
-plot.tx.sex <- dat |>
- mutate(Overall = "Overall") |>
- pivot_longer(
- cols = c(tx.type, Overall),
- names_to = "Attribute",
- values_to = "Category"
- ) |>
- mutate(
- Attribute = recode(
- Attribute,
- "tx.type" = "Transplant Type"
- )
- ) |>
- count(Attribute, Category, sex) |>
- group_by(Attribute, Category) |>
- mutate(
- total = sum(n),
- percent = n / total
- ) |>
- ungroup() |>
- ggplot(aes(x = Category, y = percent, fill = sex)) +
- geom_col(position = "dodge") +
- facet_grid(
- ~ Attribute,
- scales = "free_x",
- space = "free_x"
- ) +
- labs(
- title = "Percentage of Sex by Transplant Type",
- x = "Group",
- y = "Percentage",
- fill = NULL
- ) +
- scale_x_discrete(
- labels = c("Cadaveric" = "Deceased")
- ) +
- my.theme
-plot.tx.sex
-```
-
-## Transplant Type vs Recipient Age
-
-```{r, fig.width=7, fig.height=5}
-plot.tx.rec <- ggplot(data = dat) +
- geom_density(aes(x = age.rec, color = tx.type)) +
- geom_density(
- aes(x = age.rec, color = "Overall"),
- linewidth = 0.5,
- linetype = "dashed"
- ) +
- scale_color_manual(
- values = c(
- "Cadaveric" = "darkorange",
- "Living" = "darkgreen",
- "Overall" = "gray50"
- ),
- labels = c(
- "Cadaveric" = "Deceased"
- )
- ) +
- labs(
- title = "Density of Recipient Age by Transplant Type",
- x = "Recipient Age",
- y = "Density",
- color = NULL
- ) +
- my.theme
-plot.tx.rec
-```
-
-
-# Elementary Analysis: Transplant Type (Cont.)
-
-## Transplant Type vs HLA Match
-
-Living donors have less `hla.match` then cadaveric donors.
-
-```{r, fig.width=7, fig.height=5}
-ggplot(data = dat) +
- geom_bar(aes(x = hla.match)) +
- facet_wrap(tx.type ~ ., ncol = 1) +
- labs(
- title = "Distribution of HLA Match by Transplant Type",
- x = "HLA match",
- y = "Count",
- color = NULL
- ) +
- my.theme
-
-```
-
-## Transplant Type vs Donor Age
-
-```{r, fig.width=7, fig.height=5}
-plot.tx.donor <- ggplot(data = dat) +
- geom_density(aes(x = age.donor, color = tx.type)) +
- geom_density(
- aes(x = age.donor, color = "Overall"),
- linewidth = 0.5,
- linetype = "dashed"
- ) +
- scale_color_manual(
- values = c(
- "Cadaveric" = "darkorange",
- "Living" = "darkgreen",
- "Overall" = "gray50"
- ),
- labels = c(
- "Cadaveric" = "Deceased"
- )
- ) +
- labs(
- title = "Density of Donor Age by Transplant Type",
- x = "Recipient Age",
- y = "Density",
- color = NULL
- ) +
- my.theme
-
-plot.tx.donor
-```
-
-# Overall Kaplan-Meier Curve
-
-```{r}
-km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
-```
-
-```{r, fig.width=7, fig.height=5}
-km_all |>
- ggsurvfit(type = "survival") +
- add_confidence_interval() +
- scale_y_continuous(limits = c(0, 1)) +
- labs(
- title = "Kaplan-Meier Curve for Overall Survival",
- x = "Years of Follow-up",
- y = "Survival Probability",
- ) +
- my.theme
-```
-
-```{r}
-summary(km_all, times = c(0, 4, 8, 12))
-```
-
-# Cox Model (with intervals?)
-
-# Cox Model: Transplant Type
-
-```{r}
-cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat)
-summary(cox.tx)
-```
-
-```{r}
-km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat)
-```
-
-```{r, fig.width=7, fig.height=5}
-km.tx |>
- ggsurvfit(type = "survival") +
- add_confidence_interval() +
- scale_color_discrete(
- labels = c("Deceased", "Living")
- ) +
- scale_fill_discrete(
- labels = c("Deceased", "Living")
- ) +
- labs(
- title = "Survival by Donor Type",
- x = "Years of Follow-up",
- y = "Survival Probability",
- color = "Donor Type",
- fill = "Donor Type"
- ) +
- my.theme
-```
-
-```{r}
-summary(km.tx, times = c(0, 4, 8, 12))
-```
-
-# Cox Model: Age Recipient (Continuous)
-
-```{r}
-cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat)
-summary(cox.age.rec)
-```
-
-```{r}
-km.age.rec <- survfit(Surv(follow.up, death) ~ age.rec, data = dat)
-```
-
-```{r, fig.width=7, fig.height=7}
-km.age.rec |>
- ggsurvfit(type = "survival") +
- scale_color_discrete(
- labels = seq(1, 18)
- ) +
- labs(
- title = "Survival by Recipient Age",
- x = "Years of Follow-up",
- y = "Survival Probability",
- color = "Recipient Age",
- ) +
- my.theme
-```
-
-
-View life table
-
-```{r}
-summary(km.age.rec, times = c(0, 4, 8, 12))
-```
-
-
-
-# Cox Model: Age Recipient (Categorical)
-
-```{r}
-class.age <- function(age) {
- if (is.na(age)) return(NA)
- if (age < 2) {
- return("0-1")
- } else if (age < 6) {
- return("2-5")
- } else if (age < 11) {
- return("6-10")
- } else if (age < 19) {
- return("11-18")
- }
-}
-
-dat.age.rec.group <- dat
-dat.age.rec.group$age.rec.group = sapply(dat$age.rec, class.age) |>
- factor(levels = c(
- "0-1",
- "2-5",
- "6-10",
- "11-18"
- ))
-cox.age.rec.group <- coxph(Surv(follow.up, death) ~ age.rec.group,
- data = dat.age.rec.group)
-```
-
-
-```{r, echo=TRUE}
-cox.age.rec.group
-```
-
-
-
-
-```{r, echo=TRUE}
-eme.age.rec.group <- emmeans(cox.age.rec.group, pairwise ~ age.rec.group,
- type = "response")
-eme.age.rec.group
-```
-
-
-```{r}
-km.age.rec.group <- survfit(Surv(follow.up, death) ~ age.rec.group,
- data = dat.age.rec.group)
-```
-
-```{r, fig.width=7, fig.height=7}
-km.age.rec.group |>
- ggsurvfit(type = "survival") +
- add_confidence_interval() +
- scale_color_discrete(
- labels = c("0-1", "2-5", "6-10", "11-18")
- ) +
- scale_fill_discrete(
- labels = c("0-1", "2-5", "6-10", "11-18")
- ) +
- labs(
- title = "Survival by Recipient Age Group",
- x = "Years of Follow-up",
- y = "Survival Probability",
- color = "Recipient Age Group",
- fill = "Recipient Age Group",
- ) +
- my.theme
-```
-
-```{r, fig.width=7, fig.height=7}
-eme.age.rec.group$contrast |>
- as.data.frame() |>
- select(contrast, ratio, SE, p.value) |>
- mutate(
- contrast = factor(contrast),
- log.ratio = log(ratio),
- SE.log = SE / ratio,
- low = exp(log.ratio - 1.96 * SE.log),
- up = exp(log.ratio + 1.96 * SE.log)
- ) |>
- ggplot(aes(y = reorder(contrast, ratio))) +
- geom_errorbarh(aes(xmin = low, xmax = up)) +
- geom_point(aes(x = ratio)) +
- geom_vline(xintercept = 1, linetype = "dashed") +
- scale_y_discrete(
- labels = function(x) {
- gsub("[()]", "", x = x)
- }
- ) +
- labs(
- x = "Hazard Ratio",
- y = "Recipient Age Group Comparison",
- title = "Pairwise Hazard Ratios by Recipient Age Groups"
- ) +
- my.theme
-```
-
-# Identifying Predictors
-
-TODO: write a more generic function
-
-```{r}
-Waldtest <- function(cox.fit, i.betas){
- q <- length(i.betas)
- coefs <- cox.fit$coefficients
- var <- cox.fit$var[i.betas, i.betas]
- Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas]
- p.value <- 1-pchisq(as.numeric(Wald), df = q)
- print(p.value)
- return(p.value)
-}
-```
-
-```{r}
-vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex",
- "tx.type")
-included <- vars == "tx.type"
-included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
-included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
-
-find.best.surv <- function(included) {
- print('---')
- test.vars <- vars[!included]
- p.values <- sapply(test.vars, function(test.var) {
- new.surv <- reformulate(
- termlabels = c(vars[included], test.var),
- response = "Surv(follow.up, death)"
- )
- fit.tx <- coxph(new.surv, data = dat)
- Waldtest(fit.tx, sum(included))
- })
- if (sum(p.values > 0.05) == sum(included)) {
- print(3)
- return(included)
- }
- if (sum(included == TRUE) == length(included)) {
- print(2)
- return(included)
- }
- print(1)
- print(which.min(p.values))
- print(included)
- print(p.values)
- print(which.min(p.values))
- included[which.min(p.values)] <- TRUE
- print(included)
- # find.best.surv(included)
-}
-
-
-find.best.surv(included)
-
-```
-
-```{r}
-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match
-fit.cox.model(surv, 3)
-surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor
-fit.cox.model(surv, 3)
-surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc
-fit.cox.model(surv, 3)
-surv <- Surv(follow.up, death) ~ tx.type + age.rec + year
-fit.cox.model(surv, 3)
-surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex
-fit.cox.model(surv, 3)
-```
-
-
-```
-
----
-
-
-# Old
-
-
-
-## Death & Censored
-
-
-```{r, echo=TRUE}
-dat |>
- group_by(death) |>
- summarise(
- n = n(),
- percentage = n() / nrow(dat) * 100
- )
-```
-
-Most of the patients (95.24%) were censored.
-
-```{r}
-n.pop <- nrow(dat)
-dat.accum <- dat[dat$follow.up > 0,] |>
- select(follow.up, death) |>
- mutate(censored = ifelse(death == 1, 0, 1)) |>
- group_by(follow.up) |>
- summarize(
- death = sum(death),
- censored = sum(censored),
- .groups = "drop"
- ) |>
- arrange(follow.up) |>
- mutate(
- death.accum = cumsum(death),
- censored.accum = cumsum(censored),
- event.accum = death.accum + censored.accum,
- n.at.risk = n.pop - c(0, event.accum[-n()])
- )
-dat.accum |>
- pivot_longer(
- cols = c(death.accum, censored.accum, n.at.risk),
- names_to = "type",
- values_to = "accum",
- ) |>
- ggplot(aes(x = follow.up, y = accum, color = type)) +
- geom_step(linewidth = 1, direction = "hv") +
- my.theme
-```
-
-```{r}
-dat[dat$death == 1,] |>
- ggplot(aes(x = follow.up)) +
- geom_histogram(bins = 50)
-```
-
-
-```{r}
-dat[dat$follow.up == 0,]
-```
-
-```{r}
-head(dat)
-```
-
-## Cox Model - Age as a Continuous Variable
-
-```{r, echo=TRUE}
-cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat)
-cox.age.rec |> summary()
-```
-
-## Cox Model - Age as a Categorical Variable
-
-
-```{r, fig.width=10, fig.height=10}
-cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + year, data = dat)
-cox.zph(cox.final)
-plot(cox.zph(cox.final))
-
-ggcoxzph(cox.zph(cox.final),
- ggtheme = theme_minimal() +
- theme(
- legend.position = "bottom",
- plot.title = element_text(size = 17, face = "bold"),
- axis.title = element_text(size = 15),
- axis.text = element_text(size = 12),
- legend.text = element_text(size = 12),
- strip.text = element_blank(),
- plot.margin = margin(20, 30, 20, 30)
- ))
-```
-
-## Table 1 (L)
-
-
-### HLA match `hla.match`
-
-```{r}
-dat$hla.match |> table(useNA = "always")
-```
-
-```{r}
-ggplot(dat, aes(x = hla.match)) +
- geom_bar() +
- labs(x = "HLA match", y = "Count") +
- theme_minimal()
-
-```
-
-```{r}
-ggplot(dat, aes(x = hla.match, fill = tx.type)) +
- geom_bar(position = "dodge") +
- labs(x = "HLA match", y = "Count") +
- theme_minimal()
-```
-
-::: {.callout-warning}
-TODO: change to percentage within each `tx.type`
-:::
-
-::: {.callout-note}
-Living donors have less `hla.match` then cadaveric donors
-:::
-
-
-### Donor age `age.donor`
-
-```{r}
-#| echo: true
-mean(dat$age.donor, na.rm = TRUE)
-median(dat$age.donor, na.rm = TRUE)
-```
-
-```{r}
-ggplot(dat, aes(x = age.donor)) +
- geom_bar() +
- labs(x = "Donor Age", y = "Count") +
- theme_minimal()
-```
-
-```{r, fig.width = 8, fig.height = 12}
-ggplot(dat, aes(x = age.donor)) +
- geom_bar() +
- labs(x = "Donor Age", y = "Count") +
- theme_minimal() +
- facet_grid(hla.match ~ .)
-```
-
-```{r, fig.width= 8, fig.height= 12}
-ggplot(dat, aes(x = age.donor, color = tx.type)) +
- geom_density() +
- labs(x = "Donor Age", y = "Density") +
- theme_minimal() +
- facet_grid(hla.match ~ .)
-```
-
-```{r}
-ggplot(dat, aes(x = age.donor, color = hla.match)) +
- geom_density() +
- labs(x = "Donor Age", y = "Count") +
- theme_minimal()
-```
-
-```{r}
-ggplot(dat, aes(x = as.factor(tx.type), y = age.donor)) +
- geom_boxplot() +
- theme_minimal()
-```
-
-::: {.callout-note}
-There is age difference between different `tx.type`
-:::
-
-
-### Recipient Age `age.rec`
-
-```{r}
-#| echo: true
-mean(dat$age.rec, na.rm = TRUE)
-median(dat$age.rec, na.rm = TRUE)
-```
-
-
-```{r}
-ggplot(dat, aes(x = age.rec)) +
- geom_bar() +
- labs(x = "Recipient Age", y = "Count") +
- theme_minimal()
-
-```
-
-```{r}
-ggplot(dat, aes(x = as.factor(age.rec), y = age.donor)) +
- geom_boxplot() +
- theme_minimal()
-
-```
-
-```{r}
-ggplot(dat, aes(x = age.rec, fill = tx.type)) +
- geom_bar(position = "dodge") +
- theme_minimal()
-
-```
-
-```{r}
-ggplot(dat, aes(x = age.rec, color = hla.match)) +
- geom_density() +
- theme_minimal()
-```
-
-```{r}
-ggplot(dat, aes(x = as.factor(age.rec), y = cold.isc)) +
- geom_boxplot() +
- theme_minimal()
-```
-
-
-### Cold Ischemia Time `cold.isc`
-
-```{r}
-#| echo: true
-mean(dat$cold.isc, na.rm = TRUE)
-median(dat$cold.isc, na.rm = TRUE)
-```
-
-```{r}
-ggplot(dat, aes(x = cold.isc)) +
- geom_density() +
- labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") +
- theme_minimal()
-```
-
-```{r}
-ggplot(dat, aes(x = cold.isc, color = tx.type, group = tx.type)) +
- geom_density() +
- labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") +
- theme_minimal()
-```
-
-```{r}
-ggplot(dat, aes(x = cold.isc)) +
- geom_density() +
- theme_minimal() +
- facet_grid(tx.type ~ .)
-```
-
-### Transplant Type `tx.type`
-
-```{r}
-dat$tx.type |> table(useNA = "always")
-```
-
-```{r}
-#| echo: true
-dat$tx.type |> table()
-```
-
-```{r}
-ggplot(dat, aes(x = tx.type)) +
- geom_bar() +
- labs(x = "Transplant Type", y = "Count") +
- theme_minimal()
-
-```
-
-### Year `year`
-
-```{r}
-dat$year |> table()
-```
-
-```{r}
-ggplot(dat, aes(x = year)) +
- geom_bar() +
- labs(x = "Year", y = "Count") +
- theme_minimal()
-```
-
-```{r}
-ggplot(dat, aes(x = year, y = follow.up)) +
- geom_boxplot()
-```
-
-```{r}
-ggplot(dat, aes(x = year, fill = tx.type)) +
- geom_bar(position = "dodge")
-```
-
-```{r}
-ggplot(dat, aes(x = year, y = age.rec)) +
- geom_boxplot()
-```
-
-```{r}
-ggplot(dat, aes(x = year, y = age.donor)) +
- geom_boxplot()
-```
-
-## Kaplan-Meier
-
-### Overall (L)
-
-```{r}
-km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
-summary(km_all, times = c(4, 8, 12))
-```
-
-```{r}
-km_all |>
- ggsurvfit(type = "survival") +
- add_confidence_interval() +
- scale_y_continuous(limits = c(0, 1), labels = scales::label_percent()) +
- labs(
- x = "Years of Follow-up",
- y = "Overall Survival Probability"
- ) +
- theme_minimal()
-```
-
-### `tx.type` (M)
-
-```{r, fig.width=7, fig.height=5}
-km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat)
-
-km.tx |>
- ggsurvfit(type = "survival") +
- add_confidence_interval() +
- scale_x_continuous(
- breaks = seq(0, 20, by = 4)
- ) +
- scale_y_continuous(
- limits = c(0, 1),
- breaks = seq(0, 1, by = 0.2),
- labels = scales::label_number(accuracy = 0.1)
- ) +
- labs(
- x = "Years of Follow-up",
- y = "Overall Survival Probability"
- ) +
- theme_minimal() +
- theme(
- legend.position = "bottom",
- plot.title = element_text(size = 17, face = "bold"),
- axis.title = element_text(size = 15),
- axis.text = element_text(size = 10),
- legend.text = element_text(size = 12),
- plot.margin = margin(20, 30, 20, 30)
- )
-```
-
-## Cox Model
-
-```{r}
-get.life.table <- function(dat, time.intervals) {
- n.pop <- nrow(dat)
-
- dat |>
- recode.dat(time.intervals) |>
- group_by(fu.interval) |>
- summarize(
- n.censored = sum(.data$death == 0),
- n.event = sum(.data$death),
- ) |>
- ungroup() |>
- calculate.hazard(n.pop)
-}
-
-get.life.table.by.groups <- function(dat, time.intervals, grps) {
- grps |>
- lapply(function(grp) {
- dat |>
- get.life.table.by.group(time.intervals, grp) |>
- mutate(
- grp.name = grp,
- grp.value = pick(1)[[1]]
- ) |>
- select(-1)
- }) |>
- bind_rows()
-}
-
-get.life.table.by.group <- function(dat, time.intervals, grp) {
- dat |>
- recode.dat(time.intervals) |>
- group_by(fu.interval, .data[[grp]]) |>
- summarize(
- n.censored = sum(.data$death == 0),
- n.event = sum(.data$death),
- .groups = "keep"
- ) |>
- ungroup(fu.interval) |>
- group_modify(function(df.sub, grp) {
- grp.name <- names(grp)
- grp.value <- grp[[1]]
- n.pop <- (dat[[grp.name]] == grp.value) |> sum()
- calculate.hazard(df.sub, n.pop)
- }) |>
- ungroup()
-}
-
-calculate.hazard <- function(life.table, n.pop) {
- n.removed <- life.table$n.event + life.table$n.censored
- n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)])
- life.table |>
- mutate(
- n.at.risk = n.pop - n.removed.accum,
- # TODO: how to account for censored? How do we adjust for uneven interval?
- hazard.rate = n.event / n.at.risk
- )
-}
-
-recode.dat <- function(dat, time.intervals) {
- df <- dat[dat$follow.up <= sum(time.intervals), ]
- time.points <- cumsum(time.intervals)
- df$fu.interval <- sapply(df$follow.up, function(time) {
- time.points[time <= time.points][1]
- })
-
- df
-}
-```
-
-### `death` Distribution (?)
-
-```{r}
-# dat |>
-# mutate(
-# accum.death = cumsum(death),
-# accum.censored = if_else(death == 1, 0, 1) |> cumsum()
-# ) |>
-# ggplot() +
-# geom_pont(aes(x = follow.up, y = accum.death, color = death))
-```
-
-```{r}
-
-```
-
-```{r}
-plot.death = dat[dat$death == 1,] |>
- ggplot(aes(x = follow.up)) +
- geom_histogram(bins = 50)
-plot.death
-```
-
-
-### Overall (L)
-
-::: {.callout-note}
-skip overall, jump directly to `tx.type`
-:::
-
-```{r}
-time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1)
-get.life.table(dat, time.intervals)
-```
-
-### `tx.type` (M)
-
-
-### `age` (continuous) (L)
-
-### `age` (categorical) (M)
-
-### Full model (L)
-
-```{r}
-m1 <- coxph(Surv(follow.up, death) ~ hla.match + tx.type, data = dat)
-summary(m1)
-
-m2 <- coxph(Surv(follow.up, death) ~ hla.match * tx.type, data = dat)
-summary(m2)
-
-anova(m1, m2, test = "LRT")
-```
-
-### Discussion: include `year` or not?
-
-## Assumption Testing
-
-
- ## Why Survival Analysis (L)
-
-Motivation: study the distribution of time to event $T$.
-
-Example: time of death after kidney transplant.
-
-
-```{r}
-ex <- data.frame(
- id = c(1, 2, 3, 4, 5),
- transplant = c(2000, 2000, 2001, 2003, 2004),
- death = c(2005, 2009, 2005, 2004, 2016)
-)
-
-ex_table_1 <- ex |>
- gt() |>
- cols_label(
- id = "ID",
- transplant = "Transplant",
- death = "Death"
- ) %>%
- cols_align(align = "center", columns = everything())
-
-ex_plot_real_time <- ggplot(ex) +
- geom_segment(aes(x = transplant, xend = death, y = factor(id),
- yend = factor(id)), color = "grey50", linewidth = 1) +
- geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
- geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) +
- scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
- scale_y_discrete(limits = rev) +
- labs(x = "Year", y = "Subject ID", color = "Event") +
- theme_minimal()
-```
-
-```{r}
-ex_table_1
-ex_plot_real_time
-```
-
----
-
-We then calculate time to death for each respondents.
-
-With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X`
-
-```{r}
-ex_t <- ex |>
- mutate(
- t = death - transplant
- )
-
-ex_table_2 <- ex_t |>
- gt() |>
- cols_label(
- id = "ID",
- transplant = "Transplant",
- death = "Death",
- t = "Time to death"
- ) |>
- cols_align(align = "center", columns = everything())
-
-ex_plot_uniform_time <- ggplot(ex_t) +
- geom_segment(aes(x = 0, xend = t, y = factor(id),
- yend = factor(id)), color = "grey50", linewidth = 1) +
- geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
- geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) +
- scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
- scale_y_discrete(limits = rev) +
- labs(x = "Year", y = "Subject ID", color = "Event") +
- theme_minimal()
-
-```
-
-
-```{r}
-ex_table_2
-ex_plot_uniform_time
-```
-
----
-
-What if we do not know when exactly some respondents die?
-
-Scenario 1: the study ends at the year 2008?
-
-```{r}
-ex_t_c <- ex_t |>
- mutate(
- death_censored = if_else(death <= 2008, death, 2008),
- death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"),
- status = if_else(death <= 2008, 1, 0),
- t_1 = death_censored - transplant,
- t_1_txt = if_else(death <= 2008, as.character(t_1),
- paste(">", as.character(t_1)))
- )
-```
-
-```{r}
-ex_table_3 <- ex_t_c |>
- select(id, transplant, death_censored_txt, t_1_txt) |>
- gt() |>
- cols_label(
- id = "ID",
- transplant = "Transplant",
- death_censored_txt = "Death",
- t_1_txt = "Time to death"
- ) |>
- cols_align(align = "center", columns = everything())
-
-ex_plot_real_time_1 <- ggplot(ex_t_c) +
- geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
- yend = factor(id)), color = "grey50", linewidth = 1) +
- geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
- geom_point(aes(x = death_censored, y = factor(id), color =
- if_else(status == 1, "death", "censored")), size = 3) +
- scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
- "censored" = "orange")) +
- scale_y_discrete(limits = rev) +
- labs(x = "Year", y = "Subject ID", color = "Event") +
- xlim(2000, 2016) +
- geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
- linewidth = 0.8) +
- theme_minimal()
-
-
-ex_plot_uniform_time_1 <- ggplot(ex_t_c) +
- geom_segment(aes(x = 0, xend = t_1, y = factor(id),
- yend = factor(id)), color = "grey50", linewidth = 1) +
- geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
- geom_point(aes(x = t_1, y = factor(id), color =
- if_else(status == 1, "death", "censored")), size = 3) +
- scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
- "censored" = "orange")) +
- scale_y_discrete(limits = rev) +
- labs(x = "Year", y = "Subject ID", color = "Event") +
- theme_minimal()
-```
-
-
-```{r}
-ex_table_3
-ex_plot_real_time_1
-ex_plot_uniform_time_1
-```
-
-**Right Censoring**: only observe the event (death) if it occurs before a
-certain time (2008).
-
----
-
-Scenario 2: respondent 3 move away; loss follow up
-
-```{r}
-ex_alt <- ex_t_c
-ex_alt$death_censored_txt[3] <- "> 2005"
-ex_alt$status[3] <- 0
-ex_alt$t_1_txt[3] <- "> 4"
-
-
-ex_table_4 <- ex_alt |>
- select(id, transplant, death_censored_txt, t_1_txt) |>
- gt() |>
- cols_label(
- id = "ID",
- transplant = "Transplant",
- death_censored_txt = "Death",
- t_1_txt = "Time to death"
- ) |>
- cols_align(align = "center", columns = everything())
-
-ex_plot_real_time_2 <- ggplot(ex_alt) +
- geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
- yend = factor(id)), color = "grey50", linewidth = 1) +
- geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
- geom_point(aes(x = death_censored, y = factor(id), color =
- if_else(status == 1, "death", "censored")), size = 3) +
- scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
- "censored" = "orange")) +
- scale_y_discrete(limits = rev) +
- labs(x = "Year", y = "Subject ID", color = "Event") +
- xlim(2000, 2016) +
- geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
- linewidth = 0.8) +
- theme_minimal()
-
-
-ex_plot_uniform_time_2 <- ggplot(ex_alt) +
- geom_segment(aes(x = 0, xend = t_1, y = factor(id),
- yend = factor(id)), color = "grey50", linewidth = 1) +
- geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
- geom_point(aes(x = t_1, y = factor(id), color =
- if_else(status == 1, "death", "censored")), size = 3) +
- scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
- "censored" = "orange")) +
- scale_y_discrete(limits = rev) +
- labs(x = "Year", y = "Subject ID", color = "Event") +
- theme_minimal()
-```
-
-```{r}
-ex_table_4
-ex_plot_real_time_2
-ex_plot_uniform_time_2
-```
-
----
-
-How many patients are right censored?
-
-```{r}
-dat |>
- mutate(Overall = "Overall") |>
- pivot_longer(
- cols = c(Overall, sex, tx.type),
- names_to = "Attribute",
- values_to = "Category"
- ) |>
- count(Attribute, Category, death) |>
- ggplot(aes(x = Category, y = n, fill = factor(death))) +
- geom_col(position = "dodge") +
- facet_wrap(~ Attribute, scales = "free_x") +
- labs(x = "Group", y = "Count", fill = "Death Status") +
- theme_minimal()
-```
-
-## Challenges in Survival Analysis (L)
-
-- **Right Censoring**: only observe the event if it occurs before a certain
- time.
-- **Left Censoring**: event has occurred prior to the start of a research
- - Follow up every 3 years.
- - Event has occurred -> event happened sometime before follow up.
-- **Left Truncation**: delayed entry; respondents are included only if they
- survived long enough.
- - Start follow up with patients 100 days after the transplant
- - Patients dies within 100 day wouldn't be included in the dataset
-- **Right Truncation**: respondents are included only if they have already
- experienced the event.
- - Retrospective analysis from deceased patients between 1990 and 2000.
- - Patients not dead before 2000 are not included in the dataset.
-
-
diff --git a/slides.html b/slides.html
index 260d904..80b495b 100644
--- a/slides.html
+++ b/slides.html
@@ -2214,48 +2214,6 @@ window.Quarto = {
Method & Result
Explain Dataset (M)
-
-
-
-
-sex and recipient age are similar across tx.type
-Living donors have less hla.match then cadaveric donors
-There is age difference between different tx.type
-
-
-
-
- sex ~ tx.tpye
-
-
-
-
sex are similar across tx.type
-
-
-
-
-
-
-
-
-
-
-
-
Table 1 (L)
@@ -2272,7 +2230,7 @@ Note
-
+
@@ -2281,37 +2239,11 @@ Note
-
+
-
-
-
-
TODO: change to percentage within each tx.type
-
-
-
-
-
-
Living donors have less hla.match then cadaveric donors
-
-
Donor age age.donor
@@ -2333,7 +2265,7 @@ Note
-
+
@@ -2346,7 +2278,7 @@ Note
-
+
@@ -2359,15 +2291,28 @@ Note
-
+
+
+
+ Recipient Age age.rec
+
+
mean(dat$age.rec, na.rm = TRUE)
+
+
median(dat$age.rec, na.rm = TRUE)
+
+
-
Warning: Removed 113 rows containing non-finite outside the scale range
-(`stat_density()`).
+
Warning: Removed 9 rows containing non-finite outside the scale range
+(`stat_count()`).
@@ -2390,36 +2335,23 @@ Note
-
-
-
-
There is age difference between different tx.type
-
-
-
-
- Recipient Age age.rec
-
mean(dat$age.rec, na.rm = TRUE)
-
-
[1] 11.64653
+
+
Warning: Removed 9 rows containing non-finite outside the scale range
+(`stat_count()`).
+
+
+
+
+
+
-
median(dat$age.rec, na.rm = TRUE)
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_count()`).
+(`stat_density()`).
@@ -2431,7 +2363,7 @@ Note
-
Warning: Removed 113 rows containing non-finite outside the scale range
+Warning: Removed 2250 rows containing non-finite outside the scale range
(`stat_boxplot()`).
@@ -2442,22 +2374,22 @@ Note
+
+
+ Cold Ischemia Time cold.isc
-
-
Warning: Removed 9 rows containing non-finite outside the scale range
-(`stat_count()`).
-
-
-
-
-
-
+
mean(dat$cold.isc, na.rm = TRUE)
+
+
median(dat$cold.isc, na.rm = TRUE)
+
-
Warning: Removed 9 rows containing non-finite outside the scale range
+Warning: Removed 2250 rows containing non-finite outside the scale range
(`stat_density()`).
@@ -2471,7 +2403,7 @@ Note
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_boxplot()`).
+(`stat_density()`).
@@ -2482,69 +2414,10 @@ Note
-
- Cold Ischemia Time cold.isc
-
-
mean(dat$cold.isc, na.rm = TRUE)
-
-
median(dat$cold.isc, na.rm = TRUE)
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-
-
-
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-
-
-
-
-
-
-
-
Warning: Removed 2250 rows containing non-finite outside the scale range
-(`stat_density()`).
-
-
-
-
-
-
-
-
-
-
Transplant Type tx.type
-
-
-Cadaveric Living <NA>
- 5148 4627 0
-
-
-
-
+
Cadaveric Living
@@ -2555,7 +2428,7 @@ Cadaveric Living
-
+
@@ -2574,7 +2447,7 @@ Cadaveric Living
-
+
@@ -2583,7 +2456,7 @@ Cadaveric Living
-
+
@@ -2592,7 +2465,7 @@ Cadaveric Living
-
+
@@ -2605,7 +2478,7 @@ Cadaveric Living
-
+
@@ -2618,79 +2491,50 @@ Cadaveric Living
-
+
-
- Kaplan-Meier
-
- Overall (L)
-
-
-
Call: survfit(formula = Surv(follow.up, death) ~ 1, data = dat)
-
- time n.risk n.event survival std.err lower 95% CI upper 95% CI
- 4 4150 359 0.953 0.00252 0.948 0.958
- 8 1197 86 0.919 0.00449 0.910 0.928
- 12 48 20 0.888 0.00867 0.871 0.905
-
-
+
+ Overall Kaplan-Meier
-
+
-
- tx.type (M)
-
-
-
-
-
-
-
-
-
-
-
-
- Cox Model
+
+ Hazard Functions
death Distribution (?)
-
+
+
+
+
+
+
+
+
+
+
-
- Overall (L)
-
-
-
-
skip overall, jump directly to tx.type
-
-
+
+ Overall (?)
# A tibble: 7 × 5
@@ -2706,17 +2550,20 @@ Note
-
- tx.type (M)
+
-
-
- age (categorical) (M)
+
+ Cox Model
+
-
- Full model (L)
+
+ age (categorical) (M)
+
+
+ Full model (L)
Call:
@@ -2807,12 +2654,12 @@ Score (logrank) test = 69.92 on 13 df, p=8e-10
-
- Discussion: include year or not?
+
+ Discussion: include year or not?
-
diff --git a/slides.qmd b/slides.qmd
new file mode 100644
index 0000000..81b33bb
--- /dev/null
+++ b/slides.qmd
@@ -0,0 +1,609 @@
+---
+title: Slides
+execute:
+ cache: true
+ freeze: auto
+ include: true
+ echo: false
+number-sections: true
+---
+
+```{r}
+library(tidyverse)
+library(dplyr)
+library(ggplot2)
+library(survival)
+library(emmeans)
+library(foreign)
+library(gtsummary)
+library(gt)
+library(ggsurvfit)
+```
+
+```{r}
+dat <- read.csv("./unos.txt", sep = "\t")
+names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
+ "year", "sex", "tx.type", "follow.up")
+dat <- dat |>
+ mutate(
+ sex = factor(sex, levels = c(0,1), labels = c("Female","Male")),
+ tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")),
+ hla.match = factor(hla.match),
+ year = factor(year)
+ )
+```
+
+# Introduction: research question
+
+## Survival after transplantation (M)
+
+## Identify Predictors (M)
+
+## Why Survival Analysis (L)
+
+Motivation: study the distribution of time to event $T$.
+
+Example: time of death after kidney transplant.
+
+
+```{r}
+ex <- data.frame(
+ id = c(1, 2, 3, 4, 5),
+ transplant = c(2000, 2000, 2001, 2003, 2004),
+ death = c(2005, 2009, 2005, 2004, 2016)
+)
+
+ex_table_1 <- ex |>
+ gt() |>
+ cols_label(
+ id = "ID",
+ transplant = "Transplant",
+ death = "Death"
+ ) %>%
+ cols_align(align = "center", columns = everything())
+
+ex_plot_real_time <- ggplot(ex) +
+ geom_segment(aes(x = transplant, xend = death, y = factor(id),
+ yend = factor(id)), color = "grey50", linewidth = 1) +
+ geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
+ geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) +
+ scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
+ scale_y_discrete(limits = rev) +
+ labs(x = "Year", y = "Subject ID", color = "Event") +
+ theme_minimal()
+```
+
+```{r}
+ex_table_1
+ex_plot_real_time
+```
+
+---
+
+We then calculate time to death for each respondents.
+
+With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X`
+
+```{r}
+ex_t <- ex |>
+ mutate(
+ t = death - transplant
+ )
+
+ex_table_2 <- ex_t |>
+ gt() |>
+ cols_label(
+ id = "ID",
+ transplant = "Transplant",
+ death = "Death",
+ t = "Time to death"
+ ) |>
+ cols_align(align = "center", columns = everything())
+
+ex_plot_uniform_time <- ggplot(ex_t) +
+ geom_segment(aes(x = 0, xend = t, y = factor(id),
+ yend = factor(id)), color = "grey50", linewidth = 1) +
+ geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
+ geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) +
+ scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
+ scale_y_discrete(limits = rev) +
+ labs(x = "Year", y = "Subject ID", color = "Event") +
+ theme_minimal()
+
+```
+
+
+```{r}
+ex_table_2
+ex_plot_uniform_time
+```
+
+---
+
+What if we do not know when exactly some respondents die?
+
+Scenario 1: the study ends at the year 2008?
+
+```{r}
+ex_t_c <- ex_t |>
+ mutate(
+ death_censored = if_else(death <= 2008, death, 2008),
+ death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"),
+ status = if_else(death <= 2008, 1, 0),
+ t_1 = death_censored - transplant,
+ t_1_txt = if_else(death <= 2008, as.character(t_1),
+ paste(">", as.character(t_1)))
+ )
+```
+
+```{r}
+ex_table_3 <- ex_t_c |>
+ select(id, transplant, death_censored_txt, t_1_txt) |>
+ gt() |>
+ cols_label(
+ id = "ID",
+ transplant = "Transplant",
+ death_censored_txt = "Death",
+ t_1_txt = "Time to death"
+ ) |>
+ cols_align(align = "center", columns = everything())
+
+ex_plot_real_time_1 <- ggplot(ex_t_c) +
+ geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
+ yend = factor(id)), color = "grey50", linewidth = 1) +
+ geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
+ geom_point(aes(x = death_censored, y = factor(id), color =
+ if_else(status == 1, "death", "censored")), size = 3) +
+ scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
+ "censored" = "orange")) +
+ scale_y_discrete(limits = rev) +
+ labs(x = "Year", y = "Subject ID", color = "Event") +
+ xlim(2000, 2016) +
+ geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
+ linewidth = 0.8) +
+ theme_minimal()
+
+
+ex_plot_uniform_time_1 <- ggplot(ex_t_c) +
+ geom_segment(aes(x = 0, xend = t_1, y = factor(id),
+ yend = factor(id)), color = "grey50", linewidth = 1) +
+ geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
+ geom_point(aes(x = t_1, y = factor(id), color =
+ if_else(status == 1, "death", "censored")), size = 3) +
+ scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
+ "censored" = "orange")) +
+ scale_y_discrete(limits = rev) +
+ labs(x = "Year", y = "Subject ID", color = "Event") +
+ theme_minimal()
+```
+
+
+```{r}
+ex_table_3
+ex_plot_real_time_1
+ex_plot_uniform_time_1
+```
+
+**Right Censoring**: only observe the event (death) if it occurs before a
+certain time (2008).
+
+---
+
+Scenario 2: respondent 3 move away; loss follow up
+
+```{r}
+ex_alt <- ex_t_c
+ex_alt$death_censored_txt[3] <- "> 2005"
+ex_alt$status[3] <- 0
+ex_alt$t_1_txt[3] <- "> 4"
+
+
+ex_table_4 <- ex_alt |>
+ select(id, transplant, death_censored_txt, t_1_txt) |>
+ gt() |>
+ cols_label(
+ id = "ID",
+ transplant = "Transplant",
+ death_censored_txt = "Death",
+ t_1_txt = "Time to death"
+ ) |>
+ cols_align(align = "center", columns = everything())
+
+ex_plot_real_time_2 <- ggplot(ex_alt) +
+ geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
+ yend = factor(id)), color = "grey50", linewidth = 1) +
+ geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
+ geom_point(aes(x = death_censored, y = factor(id), color =
+ if_else(status == 1, "death", "censored")), size = 3) +
+ scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
+ "censored" = "orange")) +
+ scale_y_discrete(limits = rev) +
+ labs(x = "Year", y = "Subject ID", color = "Event") +
+ xlim(2000, 2016) +
+ geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
+ linewidth = 0.8) +
+ theme_minimal()
+
+
+ex_plot_uniform_time_2 <- ggplot(ex_alt) +
+ geom_segment(aes(x = 0, xend = t_1, y = factor(id),
+ yend = factor(id)), color = "grey50", linewidth = 1) +
+ geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
+ geom_point(aes(x = t_1, y = factor(id), color =
+ if_else(status == 1, "death", "censored")), size = 3) +
+ scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
+ "censored" = "orange")) +
+ scale_y_discrete(limits = rev) +
+ labs(x = "Year", y = "Subject ID", color = "Event") +
+ theme_minimal()
+```
+
+```{r}
+ex_table_4
+ex_plot_real_time_2
+ex_plot_uniform_time_2
+```
+
+---
+
+How many patients are right censored?
+
+```{r}
+dat |>
+ mutate(Overall = "Overall") |>
+ pivot_longer(
+ cols = c(Overall, sex, tx.type),
+ names_to = "Attribute",
+ values_to = "Category"
+ ) |>
+ count(Attribute, Category, death) |>
+ ggplot(aes(x = Category, y = n, fill = factor(death))) +
+ geom_col(position = "dodge") +
+ facet_wrap(~ Attribute, scales = "free_x") +
+ labs(x = "Group", y = "Count", fill = "Death Status") +
+ theme_minimal()
+```
+
+## Challenges in Survival Analysis (L)
+
+- **Right Censoring**: only observe the event if it occurs before a certain
+ time.
+- **Left Censoring**: event has occurred prior to the start of a research
+ - Follow up every 3 years.
+ - Event has occurred -> event happened sometime before follow up.
+- **Left Truncation**: delayed entry; respondents are included only if they
+ survived long enough.
+ - Start follow up with patients 100 days after the transplant
+ - Patients dies within 100 day wouldn't be included in the dataset
+- **Right Truncation**: respondents are included only if they have already
+ experienced the event.
+ - Retrospective analysis from deceased patients between 1990 and 2000.
+ - Patients not dead before 2000 are not included in the dataset.
+
+# Method & Result
+
+## Explain Dataset (M)
+
+## Table 1 (L)
+
+
+### HLA match `hla.match`
+
+```{r}
+dat$hla.match |> table(useNA = "always")
+```
+
+```{r}
+ggplot(dat, aes(x = hla.match)) +
+ geom_bar() +
+ labs(x = "HLA match", y = "Count") +
+ theme_minimal()
+
+```
+
+```{r}
+ggplot(dat, aes(x = hla.match, fill = tx.type)) +
+ geom_bar(position = "dodge") +
+ labs(x = "HLA match", y = "Count") +
+ theme_minimal()
+```
+
+### Donor age `age.donor`
+
+```{r}
+#| echo: true
+mean(dat$age.donor, na.rm = TRUE)
+median(dat$age.donor, na.rm = TRUE)
+```
+
+```{r}
+ggplot(dat, aes(x = age.donor)) +
+ geom_bar() +
+ labs(x = "Donor Age", y = "Count") +
+ theme_minimal()
+```
+
+```{r}
+#| fig-width: 8
+#| fig-height: 12
+ggplot(dat, aes(x = age.donor)) +
+ geom_bar() +
+ labs(x = "Donor Age", y = "Count") +
+ theme_minimal() +
+ facet_grid(hla.match ~ .)
+```
+
+```{r}
+ggplot(dat, aes(x = age.donor, color = hla.match)) +
+ geom_density() +
+ labs(x = "Donor Age", y = "Count") +
+ theme_minimal()
+```
+
+
+### Recipient Age `age.rec`
+
+```{r}
+#| echo: true
+mean(dat$age.rec, na.rm = TRUE)
+median(dat$age.rec, na.rm = TRUE)
+```
+
+
+```{r}
+ggplot(dat, aes(x = age.rec)) +
+ geom_bar() +
+ labs(x = "Recipient Age", y = "Count") +
+ theme_minimal()
+
+```
+
+```{r}
+ggplot(dat, aes(x = as.factor(age.rec), y = age.donor)) +
+ geom_boxplot() +
+ theme_minimal()
+
+```
+
+```{r}
+ggplot(dat, aes(x = age.rec, fill = tx.type)) +
+ geom_bar(position = "dodge") +
+ theme_minimal()
+
+```
+
+```{r}
+ggplot(dat, aes(x = age.rec, color = hla.match)) +
+ geom_density() +
+ theme_minimal()
+```
+
+```{r}
+ggplot(dat, aes(x = as.factor(age.rec), y = cold.isc)) +
+ geom_boxplot() +
+ theme_minimal()
+```
+
+
+### Cold Ischemia Time `cold.isc`
+
+```{r}
+#| echo: true
+mean(dat$cold.isc, na.rm = TRUE)
+median(dat$cold.isc, na.rm = TRUE)
+```
+
+```{r}
+ggplot(dat, aes(x = cold.isc)) +
+ geom_density() +
+ labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") +
+ theme_minimal()
+```
+
+```{r}
+ggplot(dat, aes(x = cold.isc, color = tx.type, group = tx.type)) +
+ geom_density() +
+ labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") +
+ theme_minimal()
+```
+
+### Transplant Type `tx.type`
+
+```{r}
+#| echo: true
+dat$tx.type |> table()
+```
+
+```{r}
+ggplot(dat, aes(x = tx.type)) +
+ geom_bar() +
+ labs(x = "Transplant Type", y = "Count") +
+ theme_minimal()
+
+```
+
+### Year `year`
+
+```{r}
+dat$year |> table()
+```
+
+```{r}
+ggplot(dat, aes(x = year)) +
+ geom_bar() +
+ labs(x = "Year", y = "Count") +
+ theme_minimal()
+```
+
+```{r}
+ggplot(dat, aes(x = year, y = follow.up)) +
+ geom_boxplot()
+```
+
+```{r}
+ggplot(dat, aes(x = year, fill = tx.type)) +
+ geom_bar(position = "dodge")
+```
+
+```{r}
+ggplot(dat, aes(x = year, y = age.rec)) +
+ geom_boxplot()
+```
+
+```{r}
+ggplot(dat, aes(x = year, y = age.donor)) +
+ geom_boxplot()
+```
+
+## Overall Kaplan-Meier
+
+```{r}
+km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
+```
+
+```{r}
+km_all |>
+ ggsurvfit(type = "survival") +
+ add_confidence_interval() +
+ scale_y_continuous(limits = c(0, 1), labels = scales::label_percent()) +
+ labs(
+ x = "Years of Follow-up",
+ y = "Overall Survival Probability"
+ ) +
+ theme_minimal()
+```
+
+## Hazard Functions
+
+```{r}
+get.life.table <- function(dat, time.intervals) {
+ n.pop <- nrow(dat)
+
+ dat |>
+ recode.dat(time.intervals) |>
+ group_by(fu.interval) |>
+ summarize(
+ n.censored = sum(.data$death == 0),
+ n.event = sum(.data$death),
+ ) |>
+ ungroup() |>
+ calculate.hazard(n.pop)
+}
+
+get.life.table.by.groups <- function(dat, time.intervals, grps) {
+ grps |>
+ lapply(function(grp) {
+ dat |>
+ get.life.table.by.group(time.intervals, grp) |>
+ mutate(
+ grp.name = grp,
+ grp.value = pick(1)[[1]]
+ ) |>
+ select(-1)
+ }) |>
+ bind_rows()
+}
+
+get.life.table.by.group <- function(dat, time.intervals, grp) {
+ dat |>
+ recode.dat(time.intervals) |>
+ group_by(fu.interval, .data[[grp]]) |>
+ summarize(
+ n.censored = sum(.data$death == 0),
+ n.event = sum(.data$death),
+ .groups = "keep"
+ ) |>
+ ungroup(fu.interval) |>
+ group_modify(function(df.sub, grp) {
+ grp.name <- names(grp)
+ grp.value <- grp[[1]]
+ n.pop <- (dat[[grp.name]] == grp.value) |> sum()
+ calculate.hazard(df.sub, n.pop)
+ }) |>
+ ungroup()
+}
+
+calculate.hazard <- function(life.table, n.pop) {
+ n.removed <- life.table$n.event + life.table$n.censored
+ n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)])
+ life.table |>
+ mutate(
+ n.at.risk = n.pop - n.removed.accum,
+ # TODO: how to account for censored? How do we adjust for uneven interval?
+ hazard.rate = n.event / n.at.risk
+ )
+}
+
+recode.dat <- function(dat, time.intervals) {
+ df <- dat[dat$follow.up <= sum(time.intervals), ]
+ time.points <- cumsum(time.intervals)
+ df$fu.interval <- sapply(df$follow.up, function(time) {
+ time.points[time <= time.points][1]
+ })
+
+ df
+}
+```
+
+### `death` Distribution (?)
+
+```{r}
+dat |>
+ mutate(
+ accum.death = cumsum(death),
+ accum.censored = if_else(death == 1, 0, 1) |> cumsum()
+ ) |>
+ ggplot() +
+ geom_step(aes(x = follow.up, y = accum.death))
+```
+
+```{r}
+
+```
+
+```{r}
+plot.death = dat[dat$death == 1,] |>
+ ggplot(aes(x = follow.up)) +
+ geom_histogram(bins = 50)
+plot.death
+```
+
+
+### Overall (?)
+
+```{r}
+time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1)
+get.life.table(dat, time.intervals)
+```
+
+### `tx.type` (M)
+
+
+```{r}
+
+```
+
+
+## Cox Model
+
+### `age` (continuous) (L)
+
+### `age` (categorical) (M)
+
+### Full model (L)
+
+```{r}
+m1 <- coxph(Surv(follow.up, death) ~ hla.match + tx.type, data = dat)
+summary(m1)
+
+m2 <- coxph(Surv(follow.up, death) ~ hla.match * tx.type, data = dat)
+summary(m2)
+
+anova(m1, m2, test = "LRT")
+```
+
+### Discussion: include `year` or not?
+
+## Assumption Testing
+
+# Conclusion (M + L)