some changes
This commit is contained in:
parent
bce631d9e0
commit
a23e66a9ad
4 changed files with 6565 additions and 757 deletions
1302
report.Rmd
1302
report.Rmd
File diff suppressed because it is too large
Load diff
2336
report.html
2336
report.html
File diff suppressed because it is too large
Load diff
3075
slides.html
Normal file
3075
slides.html
Normal file
File diff suppressed because it is too large
Load diff
609
slides.qmd
Normal file
609
slides.qmd
Normal file
|
|
@ -0,0 +1,609 @@
|
|||
---
|
||||
title: Slides
|
||||
execute:
|
||||
cache: true
|
||||
freeze: auto
|
||||
include: true
|
||||
echo: false
|
||||
number-sections: true
|
||||
---
|
||||
|
||||
```{r}
|
||||
library(tidyverse)
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
library(survival)
|
||||
library(emmeans)
|
||||
library(foreign)
|
||||
library(gtsummary)
|
||||
library(gt)
|
||||
library(ggsurvfit)
|
||||
```
|
||||
|
||||
```{r}
|
||||
dat <- read.csv("./unos.txt", sep = "\t")
|
||||
names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
|
||||
"year", "sex", "tx.type", "follow.up")
|
||||
dat <- dat |>
|
||||
mutate(
|
||||
sex = factor(sex, levels = c(0,1), labels = c("Female","Male")),
|
||||
tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")),
|
||||
hla.match = factor(hla.match),
|
||||
year = factor(year)
|
||||
)
|
||||
```
|
||||
|
||||
# Introduction: research question
|
||||
|
||||
## Survival after transplantation (M)
|
||||
|
||||
## Identify Predictors (M)
|
||||
|
||||
## Why Survival Analysis (L)
|
||||
|
||||
Motivation: study the distribution of time to event $T$.
|
||||
|
||||
Example: time of death after kidney transplant.
|
||||
|
||||
|
||||
```{r}
|
||||
ex <- data.frame(
|
||||
id = c(1, 2, 3, 4, 5),
|
||||
transplant = c(2000, 2000, 2001, 2003, 2004),
|
||||
death = c(2005, 2009, 2005, 2004, 2016)
|
||||
)
|
||||
|
||||
ex_table_1 <- ex |>
|
||||
gt() |>
|
||||
cols_label(
|
||||
id = "ID",
|
||||
transplant = "Transplant",
|
||||
death = "Death"
|
||||
) %>%
|
||||
cols_align(align = "center", columns = everything())
|
||||
|
||||
ex_plot_real_time <- ggplot(ex) +
|
||||
geom_segment(aes(x = transplant, xend = death, y = factor(id),
|
||||
yend = factor(id)), color = "grey50", linewidth = 1) +
|
||||
geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
|
||||
geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) +
|
||||
scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
|
||||
scale_y_discrete(limits = rev) +
|
||||
labs(x = "Year", y = "Subject ID", color = "Event") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ex_table_1
|
||||
ex_plot_real_time
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
We then calculate time to death for each respondents.
|
||||
|
||||
With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X`
|
||||
|
||||
```{r}
|
||||
ex_t <- ex |>
|
||||
mutate(
|
||||
t = death - transplant
|
||||
)
|
||||
|
||||
ex_table_2 <- ex_t |>
|
||||
gt() |>
|
||||
cols_label(
|
||||
id = "ID",
|
||||
transplant = "Transplant",
|
||||
death = "Death",
|
||||
t = "Time to death"
|
||||
) |>
|
||||
cols_align(align = "center", columns = everything())
|
||||
|
||||
ex_plot_uniform_time <- ggplot(ex_t) +
|
||||
geom_segment(aes(x = 0, xend = t, y = factor(id),
|
||||
yend = factor(id)), color = "grey50", linewidth = 1) +
|
||||
geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
|
||||
geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) +
|
||||
scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
|
||||
scale_y_discrete(limits = rev) +
|
||||
labs(x = "Year", y = "Subject ID", color = "Event") +
|
||||
theme_minimal()
|
||||
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
ex_table_2
|
||||
ex_plot_uniform_time
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
What if we do not know when exactly some respondents die?
|
||||
|
||||
Scenario 1: the study ends at the year 2008?
|
||||
|
||||
```{r}
|
||||
ex_t_c <- ex_t |>
|
||||
mutate(
|
||||
death_censored = if_else(death <= 2008, death, 2008),
|
||||
death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"),
|
||||
status = if_else(death <= 2008, 1, 0),
|
||||
t_1 = death_censored - transplant,
|
||||
t_1_txt = if_else(death <= 2008, as.character(t_1),
|
||||
paste(">", as.character(t_1)))
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ex_table_3 <- ex_t_c |>
|
||||
select(id, transplant, death_censored_txt, t_1_txt) |>
|
||||
gt() |>
|
||||
cols_label(
|
||||
id = "ID",
|
||||
transplant = "Transplant",
|
||||
death_censored_txt = "Death",
|
||||
t_1_txt = "Time to death"
|
||||
) |>
|
||||
cols_align(align = "center", columns = everything())
|
||||
|
||||
ex_plot_real_time_1 <- ggplot(ex_t_c) +
|
||||
geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
|
||||
yend = factor(id)), color = "grey50", linewidth = 1) +
|
||||
geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
|
||||
geom_point(aes(x = death_censored, y = factor(id), color =
|
||||
if_else(status == 1, "death", "censored")), size = 3) +
|
||||
scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
|
||||
"censored" = "orange")) +
|
||||
scale_y_discrete(limits = rev) +
|
||||
labs(x = "Year", y = "Subject ID", color = "Event") +
|
||||
xlim(2000, 2016) +
|
||||
geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
|
||||
linewidth = 0.8) +
|
||||
theme_minimal()
|
||||
|
||||
|
||||
ex_plot_uniform_time_1 <- ggplot(ex_t_c) +
|
||||
geom_segment(aes(x = 0, xend = t_1, y = factor(id),
|
||||
yend = factor(id)), color = "grey50", linewidth = 1) +
|
||||
geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
|
||||
geom_point(aes(x = t_1, y = factor(id), color =
|
||||
if_else(status == 1, "death", "censored")), size = 3) +
|
||||
scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
|
||||
"censored" = "orange")) +
|
||||
scale_y_discrete(limits = rev) +
|
||||
labs(x = "Year", y = "Subject ID", color = "Event") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
ex_table_3
|
||||
ex_plot_real_time_1
|
||||
ex_plot_uniform_time_1
|
||||
```
|
||||
|
||||
**Right Censoring**: only observe the event (death) if it occurs before a
|
||||
certain time (2008).
|
||||
|
||||
---
|
||||
|
||||
Scenario 2: respondent 3 move away; loss follow up
|
||||
|
||||
```{r}
|
||||
ex_alt <- ex_t_c
|
||||
ex_alt$death_censored_txt[3] <- "> 2005"
|
||||
ex_alt$status[3] <- 0
|
||||
ex_alt$t_1_txt[3] <- "> 4"
|
||||
|
||||
|
||||
ex_table_4 <- ex_alt |>
|
||||
select(id, transplant, death_censored_txt, t_1_txt) |>
|
||||
gt() |>
|
||||
cols_label(
|
||||
id = "ID",
|
||||
transplant = "Transplant",
|
||||
death_censored_txt = "Death",
|
||||
t_1_txt = "Time to death"
|
||||
) |>
|
||||
cols_align(align = "center", columns = everything())
|
||||
|
||||
ex_plot_real_time_2 <- ggplot(ex_alt) +
|
||||
geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
|
||||
yend = factor(id)), color = "grey50", linewidth = 1) +
|
||||
geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
|
||||
geom_point(aes(x = death_censored, y = factor(id), color =
|
||||
if_else(status == 1, "death", "censored")), size = 3) +
|
||||
scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
|
||||
"censored" = "orange")) +
|
||||
scale_y_discrete(limits = rev) +
|
||||
labs(x = "Year", y = "Subject ID", color = "Event") +
|
||||
xlim(2000, 2016) +
|
||||
geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
|
||||
linewidth = 0.8) +
|
||||
theme_minimal()
|
||||
|
||||
|
||||
ex_plot_uniform_time_2 <- ggplot(ex_alt) +
|
||||
geom_segment(aes(x = 0, xend = t_1, y = factor(id),
|
||||
yend = factor(id)), color = "grey50", linewidth = 1) +
|
||||
geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
|
||||
geom_point(aes(x = t_1, y = factor(id), color =
|
||||
if_else(status == 1, "death", "censored")), size = 3) +
|
||||
scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
|
||||
"censored" = "orange")) +
|
||||
scale_y_discrete(limits = rev) +
|
||||
labs(x = "Year", y = "Subject ID", color = "Event") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ex_table_4
|
||||
ex_plot_real_time_2
|
||||
ex_plot_uniform_time_2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
How many patients are right censored?
|
||||
|
||||
```{r}
|
||||
dat |>
|
||||
mutate(Overall = "Overall") |>
|
||||
pivot_longer(
|
||||
cols = c(Overall, sex, tx.type),
|
||||
names_to = "Attribute",
|
||||
values_to = "Category"
|
||||
) |>
|
||||
count(Attribute, Category, death) |>
|
||||
ggplot(aes(x = Category, y = n, fill = factor(death))) +
|
||||
geom_col(position = "dodge") +
|
||||
facet_wrap(~ Attribute, scales = "free_x") +
|
||||
labs(x = "Group", y = "Count", fill = "Death Status") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
## Challenges in Survival Analysis (L)
|
||||
|
||||
- **Right Censoring**: only observe the event if it occurs before a certain
|
||||
time.
|
||||
- **Left Censoring**: event has occurred prior to the start of a research
|
||||
- Follow up every 3 years.
|
||||
- Event has occurred -> event happened sometime before follow up.
|
||||
- **Left Truncation**: delayed entry; respondents are included only if they
|
||||
survived long enough.
|
||||
- Start follow up with patients 100 days after the transplant
|
||||
- Patients dies within 100 day wouldn't be included in the dataset
|
||||
- **Right Truncation**: respondents are included only if they have already
|
||||
experienced the event.
|
||||
- Retrospective analysis from deceased patients between 1990 and 2000.
|
||||
- Patients not dead before 2000 are not included in the dataset.
|
||||
|
||||
# Method & Result
|
||||
|
||||
## Explain Dataset (M)
|
||||
|
||||
## Table 1 (L)
|
||||
|
||||
|
||||
### HLA match `hla.match`
|
||||
|
||||
```{r}
|
||||
dat$hla.match |> table(useNA = "always")
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = hla.match)) +
|
||||
geom_bar() +
|
||||
labs(x = "HLA match", y = "Count") +
|
||||
theme_minimal()
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = hla.match, fill = tx.type)) +
|
||||
geom_bar(position = "dodge") +
|
||||
labs(x = "HLA match", y = "Count") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
### Donor age `age.donor`
|
||||
|
||||
```{r}
|
||||
#| echo: true
|
||||
mean(dat$age.donor, na.rm = TRUE)
|
||||
median(dat$age.donor, na.rm = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = age.donor)) +
|
||||
geom_bar() +
|
||||
labs(x = "Donor Age", y = "Count") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
```{r}
|
||||
#| fig-width: 8
|
||||
#| fig-height: 12
|
||||
ggplot(dat, aes(x = age.donor)) +
|
||||
geom_bar() +
|
||||
labs(x = "Donor Age", y = "Count") +
|
||||
theme_minimal() +
|
||||
facet_grid(hla.match ~ .)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = age.donor, color = hla.match)) +
|
||||
geom_density() +
|
||||
labs(x = "Donor Age", y = "Count") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
|
||||
### Recipient Age `age.rec`
|
||||
|
||||
```{r}
|
||||
#| echo: true
|
||||
mean(dat$age.rec, na.rm = TRUE)
|
||||
median(dat$age.rec, na.rm = TRUE)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = age.rec)) +
|
||||
geom_bar() +
|
||||
labs(x = "Recipient Age", y = "Count") +
|
||||
theme_minimal()
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = as.factor(age.rec), y = age.donor)) +
|
||||
geom_boxplot() +
|
||||
theme_minimal()
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = age.rec, fill = tx.type)) +
|
||||
geom_bar(position = "dodge") +
|
||||
theme_minimal()
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = age.rec, color = hla.match)) +
|
||||
geom_density() +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = as.factor(age.rec), y = cold.isc)) +
|
||||
geom_boxplot() +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
|
||||
### Cold Ischemia Time `cold.isc`
|
||||
|
||||
```{r}
|
||||
#| echo: true
|
||||
mean(dat$cold.isc, na.rm = TRUE)
|
||||
median(dat$cold.isc, na.rm = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = cold.isc)) +
|
||||
geom_density() +
|
||||
labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = cold.isc, color = tx.type, group = tx.type)) +
|
||||
geom_density() +
|
||||
labs(x = "Cold Ischemia Time (hours)", y = "Probability Density") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
### Transplant Type `tx.type`
|
||||
|
||||
```{r}
|
||||
#| echo: true
|
||||
dat$tx.type |> table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = tx.type)) +
|
||||
geom_bar() +
|
||||
labs(x = "Transplant Type", y = "Count") +
|
||||
theme_minimal()
|
||||
|
||||
```
|
||||
|
||||
### Year `year`
|
||||
|
||||
```{r}
|
||||
dat$year |> table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = year)) +
|
||||
geom_bar() +
|
||||
labs(x = "Year", y = "Count") +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = year, y = follow.up)) +
|
||||
geom_boxplot()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = year, fill = tx.type)) +
|
||||
geom_bar(position = "dodge")
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = year, y = age.rec)) +
|
||||
geom_boxplot()
|
||||
```
|
||||
|
||||
```{r}
|
||||
ggplot(dat, aes(x = year, y = age.donor)) +
|
||||
geom_boxplot()
|
||||
```
|
||||
|
||||
## Overall Kaplan-Meier
|
||||
|
||||
```{r}
|
||||
km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
|
||||
```
|
||||
|
||||
```{r}
|
||||
km_all |>
|
||||
ggsurvfit(type = "survival") +
|
||||
add_confidence_interval() +
|
||||
scale_y_continuous(limits = c(0, 1), labels = scales::label_percent()) +
|
||||
labs(
|
||||
x = "Years of Follow-up",
|
||||
y = "Overall Survival Probability"
|
||||
) +
|
||||
theme_minimal()
|
||||
```
|
||||
|
||||
## Hazard Functions
|
||||
|
||||
```{r}
|
||||
get.life.table <- function(dat, time.intervals) {
|
||||
n.pop <- nrow(dat)
|
||||
|
||||
dat |>
|
||||
recode.dat(time.intervals) |>
|
||||
group_by(fu.interval) |>
|
||||
summarize(
|
||||
n.censored = sum(.data$death == 0),
|
||||
n.event = sum(.data$death),
|
||||
) |>
|
||||
ungroup() |>
|
||||
calculate.hazard(n.pop)
|
||||
}
|
||||
|
||||
get.life.table.by.groups <- function(dat, time.intervals, grps) {
|
||||
grps |>
|
||||
lapply(function(grp) {
|
||||
dat |>
|
||||
get.life.table.by.group(time.intervals, grp) |>
|
||||
mutate(
|
||||
grp.name = grp,
|
||||
grp.value = pick(1)[[1]]
|
||||
) |>
|
||||
select(-1)
|
||||
}) |>
|
||||
bind_rows()
|
||||
}
|
||||
|
||||
get.life.table.by.group <- function(dat, time.intervals, grp) {
|
||||
dat |>
|
||||
recode.dat(time.intervals) |>
|
||||
group_by(fu.interval, .data[[grp]]) |>
|
||||
summarize(
|
||||
n.censored = sum(.data$death == 0),
|
||||
n.event = sum(.data$death),
|
||||
.groups = "keep"
|
||||
) |>
|
||||
ungroup(fu.interval) |>
|
||||
group_modify(function(df.sub, grp) {
|
||||
grp.name <- names(grp)
|
||||
grp.value <- grp[[1]]
|
||||
n.pop <- (dat[[grp.name]] == grp.value) |> sum()
|
||||
calculate.hazard(df.sub, n.pop)
|
||||
}) |>
|
||||
ungroup()
|
||||
}
|
||||
|
||||
calculate.hazard <- function(life.table, n.pop) {
|
||||
n.removed <- life.table$n.event + life.table$n.censored
|
||||
n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)])
|
||||
life.table |>
|
||||
mutate(
|
||||
n.at.risk = n.pop - n.removed.accum,
|
||||
# TODO: how to account for censored? How do we adjust for uneven interval?
|
||||
hazard.rate = n.event / n.at.risk
|
||||
)
|
||||
}
|
||||
|
||||
recode.dat <- function(dat, time.intervals) {
|
||||
df <- dat[dat$follow.up <= sum(time.intervals), ]
|
||||
time.points <- cumsum(time.intervals)
|
||||
df$fu.interval <- sapply(df$follow.up, function(time) {
|
||||
time.points[time <= time.points][1]
|
||||
})
|
||||
|
||||
df
|
||||
}
|
||||
```
|
||||
|
||||
### `death` Distribution (?)
|
||||
|
||||
```{r}
|
||||
dat |>
|
||||
mutate(
|
||||
accum.death = cumsum(death),
|
||||
accum.censored = if_else(death == 1, 0, 1) |> cumsum()
|
||||
) |>
|
||||
ggplot() +
|
||||
geom_step(aes(x = follow.up, y = accum.death))
|
||||
```
|
||||
|
||||
```{r}
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
plot.death = dat[dat$death == 1,] |>
|
||||
ggplot(aes(x = follow.up)) +
|
||||
geom_histogram(bins = 50)
|
||||
plot.death
|
||||
```
|
||||
|
||||
|
||||
### Overall (?)
|
||||
|
||||
```{r}
|
||||
time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1)
|
||||
get.life.table(dat, time.intervals)
|
||||
```
|
||||
|
||||
### `tx.type` (M)
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Cox Model
|
||||
|
||||
### `age` (continuous) (L)
|
||||
|
||||
### `age` (categorical) (M)
|
||||
|
||||
### Full model (L)
|
||||
|
||||
```{r}
|
||||
m1 <- coxph(Surv(follow.up, death) ~ hla.match + tx.type, data = dat)
|
||||
summary(m1)
|
||||
|
||||
m2 <- coxph(Surv(follow.up, death) ~ hla.match * tx.type, data = dat)
|
||||
summary(m2)
|
||||
|
||||
anova(m1, m2, test = "LRT")
|
||||
```
|
||||
|
||||
### Discussion: include `year` or not?
|
||||
|
||||
## Assumption Testing
|
||||
|
||||
# Conclusion (M + L)
|
||||
Loading…
Add table
Add a link
Reference in a new issue