This commit is contained in:
Louis Chih-Ming Lee 2026-05-21 04:26:15 +02:00
parent 4e4bb98fa2
commit ae2ac0be0b
325 changed files with 2349 additions and 1756 deletions

View file

@ -1,12 +1,11 @@
---
title: "Slides"
execute:
cache: true
freeze: auto
include: true
echo: false
warning: false
number-sections: true
toc: true
---
```{r}
@ -33,7 +32,7 @@ dat <- dat |>
mutate(
sex = factor(sex, levels = c(0,1), labels = c("Female","Male")),
tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")),
# hla.match = factor(hla.match),
hla.match = factor(hla.match),
# year = factor(year)
)
```
@ -50,30 +49,26 @@ my.theme <- theme_minimal() +
)
```
::: {.callout-warning}
There are 25 repondents with `follow.up = 0`, 21 of which with `death = 0` and
other 4 with `death = 1`.
```{r, echo=TRUE}
dat[dat$follow.up == 0, ] |>
group_by(death) |>
summarise(n = n())
```{r}
my.tab <- function(x) {
x |>
tab_options(
row.striping.background_color = "#F6E8C3",
table.border.top.color = "#A83262",
table.border.bottom.color = "#A83262",
column_labels.border.bottom.color = "#A83262",
table.font.size = px(16),
heading.title.font.size = px(18),
table.background.color = "#FFF4CC"
)
}
```
Reasons to omit them, that is, have `dat <- dat[dat$follow.up > 0,]` at the
very beginning:
- If `death = 1`, transplant failed. They shouldn't be included in the research
question "survival rate of a *successful* tranplant".
- If `death = 0`, transplant was successful, but they were immediately removed
from populatoin at risk, contributiong nothing to the analysis.
:::
# Title
# Table of Contents (?)
# Table of Contents
# Introduction (M)
# Introduction
# Method & Result
@ -81,8 +76,9 @@ very beginning:
- Data source
- Study population
```{r}
```{r, echo=TRUE}
nrow(dat)
range(dat$year)
```
- Covariates
- Outcome
@ -104,16 +100,42 @@ dat |>
hla.match ~ "HLA matches, n(%)",
age.donor ~ "Donor age, median (IQR)",
age.rec ~ "Recipient age, median (IQR)",
cold.isc ~ "Cold ischemic time (hours), median (IQR), ",
cold.isc ~ "Cold ischemic time (hours), median (IQR)",
sex ~ "Sex, n(%)"
),
missing = "ifany"
) |>
add_overall() |>
modify_footnote(all_stat_cols() ~ NA)
modify_footnote(all_stat_cols() ~ NA) |>
as_gt() |>
tab_style(
style = list(
cell_fill(color = "#A83262"),
cell_text(color = "white", weight = "bold")
),
locations = cells_column_labels()
) |>
tab_style(
style = list(
cell_fill(color = "#F8EFCB"),
cell_text(color = "#2F2F2F")
),
locations = cells_body()
) |>
opt_row_striping(row_striping = TRUE) |>
tab_options(
table.background.color = "#F8EFCB",
row.striping.background_color = "#EFE6D0",
table.border.top.color = "#A83262",
table.border.bottom.color = "#A83262",
column_labels.border.top.color = "#A83262",
column_labels.border.bottom.color = "#A83262",
table.font.size = px(18),
column_labels.font.weight = "bold"
)
```
# Elementary Analysis: Transplant Type (L)
# Table 1: By Transplant Type
## Transplant Type vs Sex
@ -190,16 +212,25 @@ plot.tx.rec <- ggplot(data = dat) +
plot.tx.rec
```
```{r}
dat[dat$tx.type == "Cadaveric", ]$age.rec |> table()
dat[dat$tx.type == "Living", ]$age.rec |> table()
```
# Elementary Analysis: Transplant Type (Cont.)
# Table 1: By Transplant Type
## Transplant Type vs HLA Match
Living donors have less `hla.match` then cadaveric donors.
```{r, fig.width=7, fig.height=5}
ggplot(data = dat) +
geom_bar(aes(x = hla.match)) +
dat |>
mutate(
tx.type = recode(tx.type, "Cadaveric" = "Deceased")
) |>
ggplot() +
geom_bar(aes(x = hla.match, fill = tx.type)) +
facet_wrap(tx.type ~ ., ncol = 1) +
labs(
title = "Distribution of HLA Match by Transplant Type",
@ -207,7 +238,21 @@ ggplot(data = dat) +
y = "Count",
color = NULL
) +
my.theme
scale_fill_manual(
values = c(
"Deceased" = "darkorange",
"Living" = "darkgreen"
),
labels = c(
"Deceased" = "Deceased",
"Living" = "Living"
)
) +
my.theme +
theme(
legend.position = "none",
strip.text = element_text(size = 12)
)
```
@ -242,7 +287,7 @@ plot.tx.donor <- ggplot(data = dat) +
plot.tx.donor
```
# Overall Kaplan-Meier Curve
# Overall Survival
```{r}
km.all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
@ -271,8 +316,8 @@ border_col <- "#D8C8A5"
```
```{r}
times <- seq(0, 12, 2)
summary(km.all, times = times)[c("time", "n.risk", "surv")] |>
km.all.summary <- summary(km.all, times = seq(0, 12, 2))
km.all.summary[c("time", "n.risk", "surv", "lower", "upper")] |>
as.data.frame() |>
gt() |>
tab_header(
@ -281,33 +326,13 @@ summary(km.all, times = times)[c("time", "n.risk", "surv")] |>
cols_label(
time = "Follow-up Time",
n.risk = "Number at Risk",
surv = "Survival Probability"
) |>
fmt_number(
columns = surv,
decimals = 3
) |>
tab_options(
table.font.size = px(15),
heading.title.font.size = px(20),
column_labels.font.weight = "bold"
)
```
```{r}
summary(km.all, times = times)[c("time", "n.risk", "surv")] |>
as.data.frame() |>
gt() |>
tab_header(
title = "Overall Kaplan-Meier Survival Summary"
) |>
cols_label(
time = "Follow-up Time",
n.risk = "Number at Risk",
surv = "Survival Probability"
surv = "Survival Probability",
lower = "Upper 95% CI",
upper = "Lower 95% CI",
) |>
fmt_number(columns = surv, decimals = 3) |>
fmt_number(columns = lower, decimals = 3) |>
fmt_number(columns = upper, decimals = 3) |>
fmt_number(columns = time, decimals = 0) |>
tab_style(
style = list(
@ -321,18 +346,234 @@ summary(km.all, times = times)[c("time", "n.risk", "surv")] |>
locations = cells_body()
) |>
opt_row_striping(row_striping = TRUE) |>
tab_options(
row.striping.background_color = "#F6E8C3",
table.border.top.color = "#A83262",
table.border.bottom.color = "#A83262",
column_labels.border.bottom.color = "#A83262",
table.font.size = px(16),
heading.title.font.size = px(18),
table.background.color = "#FFF4CC"
my.tab()
```
# Hazard by Transplant Type
## Death distribution
```{r}
dat[dat$death == 1, ] |>
mutate(
first.1 = follow.up <= 1,
first.5 = follow.up <= 5,
) |>
select(first.1, first.5) |>
summarize(
total = n(),
first.1.total = sum(first.1),
first.1.ratio = first.1.total / total * 100,
first.5.total = sum(first.5),
first.5.ratio = first.5.total / total * 100,
)
```
# Cox Model (with intervals?)
```{r}
dat[dat$death == 1, ] |>
ggplot(aes(x = follow.up)) +
geom_histogram(bins = 50) +
labs(
title = "Distribution of Occurence of Death",
x = "Years of Follow-up",
y = "Count"
) +
my.theme
```
```{r}
n.pop <- nrow(dat)
dat |>
select(follow.up, death) |>
mutate(censored = ifelse(death == 1, 0, 1)) |>
group_by(follow.up) |>
summarize(
death = sum(death),
censored = sum(censored),
.groups = "drop"
) |>
arrange(follow.up) |>
mutate(
death.accum = cumsum(death),
death.accum.ratio = death.accum / sum(dat$death),
) |>
ggplot() +
geom_step(aes(y = death.accum.ratio, x = follow.up)) +
labs(
title = "Cumulative Proportion of Deaths Over Follow-up",
x = "Years of Follow-up",
y = "Cumulative Proportion of Deaths"
) +
my.theme
```
# Hazard Rate by Transplant Type
```{r}
get.life.table <- function(dat, time.intervals) {
n.pop <- nrow(dat)
dat |>
recode.dat(time.intervals) |>
group_by(fu.interval) |>
summarize(
n.censored = sum(.data$death == 0),
n.event = sum(.data$death),
) |>
ungroup() |>
calculate.hazard(n.pop)
}
get.life.table.by.groups <- function(dat, time.intervals, grps) {
grps |>
lapply(function(grp) {
dat |>
get.life.table.by.group(time.intervals, grp) |>
mutate(
grp.name = grp,
grp.value = pick(1)[[1]]
) |>
select(-1)
}) |>
bind_rows()
}
get.life.table.by.group <- function(dat, time.intervals, grp) {
dat |>
recode.dat(time.intervals) |>
group_by(fu.interval, .data[[grp]]) |>
summarize(
n.censored = sum(.data$death == 0),
n.event = sum(.data$death),
.groups = "keep"
) |>
ungroup(fu.interval) |>
group_modify(function(df.sub, grp) {
grp.name <- names(grp)
grp.value <- grp[[1]]
n.pop <- (dat[[grp.name]] == grp.value) |> sum()
calculate.hazard(df.sub, n.pop)
}) |>
ungroup()
}
calculate.hazard <- function(life.table, n.pop) {
n.removed <- life.table$n.event + life.table$n.censored
n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)])
life.table |>
mutate(
n.at.risk = n.pop - n.removed.accum,
# TODO: how to account for censored? How do we adjust for uneven interval?
hazard.rate = n.event / n.at.risk
)
}
recode.dat <- function(dat, time.intervals) {
df <- dat[dat$follow.up <= sum(time.intervals), ]
time.points <- cumsum(time.intervals)
df$fu.interval <- sapply(df$follow.up, function(time) {
time.points[time <= time.points][1]
})
df
}
plot.hazard.rate.by.groups <- function(dat, time.intervals, grps) {
dat |>
get.life.table.by.groups(time.intervals, grps) |>
ggplot(aes(x = fu.interval, y = hazard.rate, color = grp.value)) +
geom_point() +
geom_line() +
scale_color_discrete(
labels = c(
"Cadaveric" = "Deceased",
"Living" = "Living"
)
) +
scale_x_continuous(
breaks = function(x) seq(floor(x[1]), ceiling(x[2]), by = 1)
) +
labs(
title = "Hazard Rate by Transplant Type",
x = "Follow-up Year",
y = "Estimated Hazard Rate",
color = "Transplant Type"
) +
my.theme
}
```
```{r, fig.width=7, fig.height=5}
time.intervals = c(rep(1/3, 3), rep(1, 4))
plot.hazard.rate.by.groups(dat, time.intervals, "tx.type")
```
```{r}
get.life.table.by.group(dat, time.intervals, "tx.type") |>
select(fu.interval, tx.type, hazard.rate) |>
pivot_wider(
names_from = tx.type,
values_from = hazard.rate
) |>
mutate(
fu.interval = case_when(
abs(fu.interval - 1/3) < 0.001 ~ "1/3",
abs(fu.interval - 2/3) < 0.001 ~ "2/3",
fu.interval %% 1 == 0 ~ as.character(as.integer(fu.interval)),
TRUE ~ as.character(fu.interval)
),
Deceased = round(Cadaveric, 3),
Living = round(Living, 3)
) |>
select(fu.interval, Deceased, Living) |>
gt() |>
tab_header(
title = "Estimated Hazard Rate by Transplant Type"
) |>
cols_label(
fu.interval = "Follow-up Interval"
) |>
cols_align(
align = "center",
columns = everything()
) |>
tab_style(
style = list(
cell_fill(color = "#5B5FE0"),
cell_text(color = "white", weight = "bold")
),
locations = cells_column_labels()
) |>
tab_style(
style = list(
cell_fill(color = "#FFF2CC"),
cell_text(color = "#2F2F2F")
),
locations = cells_body()
) |>
tab_style(
style = cell_text(
color = "#5B5FE0",
weight = "bold",
size = px(22)
),
locations = cells_title(groups = "title")
) |>
tab_options(
table.background.color = "#FFF2CC",
row.striping.background_color = "#F6E7B8",
table.border.top.color = "#5B5FE0",
table.border.bottom.color = "#5B5FE0",
column_labels.border.bottom.color = "#5B5FE0",
heading.background.color = "#FFF2CC",
heading.title.font.size = px(22),
table.font.size = px(18),
column_labels.font.weight = "bold"
) |>
opt_row_striping()
```
# Cox Model: Transplant Type
@ -356,11 +597,11 @@ km.tx |>
labels = c("Deceased", "Living")
) +
labs(
title = "Survival by Donor Type",
title = "Survival by Transplant Type",
x = "Years of Follow-up",
y = "Survival Probability",
color = "Donor Type",
fill = "Donor Type"
color = "Transplant Type",
fill = "Transplant Type"
) +
my.theme
```
@ -452,6 +693,37 @@ km.age.rec.group <- survfit(Surv(follow.up, death) ~ age.rec.group,
data = dat.age.rec.group)
```
```{r, fig.width=7, fig.height=7}
eme.age.rec.group$contrast |>
as.data.frame() |>
select(contrast, ratio, SE, p.value) |>
mutate(
contrast = factor(contrast),
log.ratio = log(ratio),
SE.log = SE / ratio,
low = exp(log.ratio - 1.96 * SE.log),
up = exp(log.ratio + 1.96 * SE.log)
) |>
ggplot(aes(y = reorder(contrast, ratio))) +
geom_errorbarh(aes(xmin = low, xmax = up), height = 0.2) +
geom_point(aes(x = ratio)) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_y_discrete(
labels = function(x) {
gsub("[()]", "", x = x)
}
) +
labs(
x = "Hazard Ratio",
y = "Recipient Age Group Comparison",
title = "Pairwise Hazard Ratios by Recipient Age Groups"
) +
my.theme
```
# Cox Model: Age Recipient (Categorical)
```{r, fig.width=7, fig.height=7}
km.age.rec.group |>
ggsurvfit(type = "survival") +
@ -472,34 +744,6 @@ km.age.rec.group |>
my.theme
```
```{r, fig.width=7, fig.height=7}
eme.age.rec.group$contrast |>
as.data.frame() |>
select(contrast, ratio, SE, p.value) |>
mutate(
contrast = factor(contrast),
log.ratio = log(ratio),
SE.log = SE / ratio,
low = exp(log.ratio - 1.96 * SE.log),
up = exp(log.ratio + 1.96 * SE.log)
) |>
ggplot(aes(y = reorder(contrast, ratio))) +
geom_errorbarh(aes(xmin = low, xmax = up)) +
geom_point(aes(x = ratio)) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_y_discrete(
labels = function(x) {
gsub("[()]", "", x = x)
}
) +
labs(
x = "Hazard Ratio",
y = "Recipient Age Group Comparison",
title = "Pairwise Hazard Ratios by Recipient Age Groups"
) +
my.theme
```
# Identifying Predictors
```{r}
@ -528,25 +772,24 @@ test.new.var <- function(old.vars, new.var, i=1) {
```{r, echo=TRUE}
old.vars <- c()
test.new.var(old.vars, "tx.type", 1)
test.new.var(old.vars, "hla.match", 1)
test.new.var(old.vars, "age.donor", 1)
test.new.var(old.vars, "age.rec", 1)
test.new.var(old.vars, "cold.isc", 1)
test.new.var(old.vars, "sex", 1)
test.new.var(old.vars, "age.donor", 1)
test.new.var(old.vars, "year", 1)
test.new.var(old.vars, "hla.match", 1)
test.new.var(old.vars, "sex", 1)
test.new.var(old.vars, "cold.isc", 1)
```
## `tx.type` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type")
# test.new.var(old.vars, "hla.match", 2:7)
test.new.var(old.vars, "hla.match", 2)
test.new.var(old.vars, "age.donor", 2)
test.new.var(old.vars, "age.rec", 2)
test.new.var(old.vars, "cold.isc", 2)
test.new.var(old.vars, "sex", 2)
test.new.var(old.vars, "age.donor", 2)
test.new.var(old.vars, "year", 2)
test.new.var(old.vars, "hla.match", 2:7)
test.new.var(old.vars, "sex", 2)
test.new.var(old.vars, "cold.isc", 2)
```
`age.rec` has the smallest $p$-value
@ -556,47 +799,47 @@ test.new.var(old.vars, "year", 2)
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec")
test.new.var(old.vars, "hla.match", 3)
test.new.var(old.vars, "age.donor", 3)
test.new.var(old.vars, "cold.isc", 3)
test.new.var(old.vars, "sex", 3)
test.new.var(old.vars, "year", 3)
```
`hla.match` has the smallest $p$-value
## `tx.type` + `age.rec` + `hla.match` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec", "hla.match")
test.new.var(old.vars, "age.donor", 4)
test.new.var(old.vars, "cold.isc", 4)
test.new.var(old.vars, "sex", 4)
test.new.var(old.vars, "year", 4)
test.new.var(old.vars, "hla.match", 3:8)
test.new.var(old.vars, "sex", 3)
test.new.var(old.vars, "cold.isc", 3)
```
`age.donor` has the smallest $p$-value
## `tx.type` + `age.rec` + `hla.match` + `age.donor` + ?
## `tx.type` + `age.rec` + `age.donor` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor")
old.vars <- c("tx.type", "age.rec", "age.donor")
test.new.var(old.vars, "cold.isc", 5)
test.new.var(old.vars, "sex", 5)
test.new.var(old.vars, "year", 5)
test.new.var(old.vars, "year", 4)
test.new.var(old.vars, "hla.match", 4:9)
test.new.var(old.vars, "sex", 4)
test.new.var(old.vars, "cold.isc", 4)
```
`year` has the smallest $p$-value
## `tx.type` + `age.rec` + `hla.match` + `age.donor` + `year` + ?
## `tx.type` + `age.rec` + `age.donor` + `year` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor", "year")
old.vars <- c("tx.type", "age.rec", "age.donor", "year")
test.new.var(old.vars, "cold.isc", 6)
test.new.var(old.vars, "sex", 6)
test.new.var(old.vars, "hla.match", 5:10)
test.new.var(old.vars, "sex", 5)
test.new.var(old.vars, "cold.isc", 5)
```
`hla.match` has the smallest $p$-value
## `tx.type` + `age.rec` + `age.donor` + `year` + `hla.match` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec", "age.donor", "year", "hla.match")
test.new.var(old.vars, "sex", 11)
test.new.var(old.vars, "cold.isc", 11)
```
Neither has $p$-value less than 0.05. Stop.
@ -604,16 +847,14 @@ Neither has $p$-value less than 0.05. Stop.
# Full Model
```{r}
surv.full <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match +
age.donor + year
surv.full <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor + year +
hla.match
cox.full <- coxph(surv.full, data = dat, method = "breslow")
```
```{r, echo=TRUE}
cox.full
summary(cox.full)
```
```{r, fig.width=6, fig.height=6}
print(1)
tidy(cox.full, exponentiate = TRUE, conf.int = TRUE) |>
mutate(
term = recode(
@ -641,14 +882,60 @@ tidy(cox.full, exponentiate = TRUE, conf.int = TRUE) |>
my.theme
```
## What happened to `year`?
# Proportional hazards assumption
```{r}
ggplot(data = dat) +
geom_boxplot(aes(x = factor(year), y = follow.up))
cox.full
cox.zph(cox.full)
```
```{r}
```{r, fig.width=15, fig.height=10}
ph_plots <- ggcoxzph(
cox.zph(cox.full), ggtheme = theme_minimal(), point.size = 5, size = 30
)
var.names <- c("Transplant Type", "Recipient Age", "Donor Age", "Year",
"HLA Match")
my_ph_theme <- theme(
legend.position = "bottom",
plot.title = element_text(size = 40, face = "bold"),
axis.title = element_text(size = 35),
axis.text = element_text(size = 30),
legend.text = element_text(size = 20)
)
for (i in seq_along(ph_plots)) {
title <- paste0("Scaled Schoenfeld Residuals: ", var.names[i])
single_plot <- ph_plots[[i]] + my_ph_theme + labs(title = title)
print(single_plot)
}
```
## What happened to `year`?
```{r, fig.width=10, fig.height=7}
ggplot(data = dat) +
geom_boxplot(aes(x = factor(year), y = follow.up)) +
my.theme
```
```{r, fig.width=10, fig.height=7}
ggplot(data = dat) +
geom_boxplot(aes(x = factor(year), y = follow.up, color = factor(death))) +
my.theme
```
```{r, fig.width=7, fig.height=5}
ggplot(data = dat) +
geom_boxplot(aes(x = factor(year), y = follow.up)) +
facet_grid(death ~ .)
@ -659,8 +946,8 @@ Probabily want to omit it
## Without `year`
```{r}
surv.full.1 <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match +
age.donor
surv.full.1 <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match +
age.donor
cox.full.1 <- coxph(surv.full.1, data = dat, method = "breslow")
```
@ -706,25 +993,16 @@ hr.compare <- bind_rows(
mutate(model = "Model without year")
) |>
mutate(
term = recode(
term,
"tx.typeLiving" = "Living donor",
"age.rec" = "Recipient age",
"hla.match" = "HLA match",
"age.donor" = "Donor age",
"year" = "Transplant year"
),
term = factor(
term,
levels = c(
"Transplant year",
"Living donor",
"Recipient age",
"Donor age",
"HLA match"
)
term = case_when(
term == "tx.typeLiving" ~ "Living donor",
term == "age.rec" ~ "Recipient age",
term == "age.donor" ~ "Donor age",
term == "year" ~ "Transplant year",
TRUE ~ term
)
)
hr.compare
ggplot(hr.compare, aes(x = estimate, y = term, color = model)) +
geom_vline(xintercept = 1, linetype = "dashed") +
geom_errorbarh(
@ -746,15 +1024,6 @@ ggplot(hr.compare, aes(x = estimate, y = term, color = model)) +
my.theme
```
---
TODO: write a more generic function (UPDATE: FAILED)
@ -869,7 +1138,7 @@ dat.accum |>
```
```{r}
dat[dat$death == 1,] |>
dat[dat$death == 1, ] |>
ggplot(aes(x = follow.up)) +
geom_histogram(bins = 50)
```