This commit is contained in:
Louis Chih-Ming Lee 2026-05-20 16:54:59 +02:00
parent 1caa283058
commit cadad4a4ba
5 changed files with 7071 additions and 4344 deletions

3
.lintr
View file

@ -1,5 +1,6 @@
linters: lintr::linters_with_defaults( linters: lintr::linters_with_defaults(
object_name_linter = lintr::object_name_linter( object_name_linter = lintr::object_name_linter(
styles = c("snake_case", "dotted.case") styles = c("snake_case", "dotted.case")
) ),
trailing_whitespace_linter = NULL
) )

1466
report.Rmd

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -21,6 +21,7 @@ library(gt)
library(ggsurvfit) library(ggsurvfit)
library(gridExtra) library(gridExtra)
library(survminer) library(survminer)
library(broom)
``` ```
@ -32,9 +33,9 @@ dat <- dat |>
mutate( mutate(
sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), sex = factor(sex, levels = c(0,1), labels = c("Female","Male")),
tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")),
hla.match = factor(hla.match), # hla.match = factor(hla.match),
year = factor(year) # year = factor(year)
) )
``` ```
```{r} ```{r}
@ -244,11 +245,11 @@ plot.tx.donor
# Overall Kaplan-Meier Curve # Overall Kaplan-Meier Curve
```{r} ```{r}
km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) km.all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
``` ```
```{r, fig.width=7, fig.height=5} ```{r, fig.width=7, fig.height=5}
km_all |> km.all |>
ggsurvfit(type = "survival") + ggsurvfit(type = "survival") +
add_confidence_interval() + add_confidence_interval() +
scale_y_continuous(limits = c(0, 1)) + scale_y_continuous(limits = c(0, 1)) +
@ -261,7 +262,74 @@ km_all |>
``` ```
```{r} ```{r}
summary(km_all, times = c(0, 4, 8, 12)) header_bg <- "#A83262" # burgundy/magenta from right panel
header_text <- "white"
body_bg <- "#FFF4CC" # light cream
stripe_bg <- "#F6E8C3" # slightly darker cream
text_color <- "#333333"
border_col <- "#D8C8A5"
```
```{r}
times <- seq(0, 12, 2)
summary(km.all, times = times)[c("time", "n.risk", "surv")] |>
as.data.frame() |>
gt() |>
tab_header(
title = "Overall Kaplan-Meier Survival Summary"
) |>
cols_label(
time = "Follow-up Time",
n.risk = "Number at Risk",
surv = "Survival Probability"
) |>
fmt_number(
columns = surv,
decimals = 3
) |>
tab_options(
table.font.size = px(15),
heading.title.font.size = px(20),
column_labels.font.weight = "bold"
)
```
```{r}
summary(km.all, times = times)[c("time", "n.risk", "surv")] |>
as.data.frame() |>
gt() |>
tab_header(
title = "Overall Kaplan-Meier Survival Summary"
) |>
cols_label(
time = "Follow-up Time",
n.risk = "Number at Risk",
surv = "Survival Probability"
) |>
fmt_number(columns = surv, decimals = 3) |>
fmt_number(columns = time, decimals = 0) |>
tab_style(
style = list(
cell_fill(color = "#A83262"),
cell_text(color = "white", weight = "bold")
),
locations = cells_column_labels()
) |>
tab_style(
style = cell_fill(color = "#FFF4CC"),
locations = cells_body()
) |>
opt_row_striping(row_striping = TRUE) |>
tab_options(
row.striping.background_color = "#F6E8C3",
table.border.top.color = "#A83262",
table.border.bottom.color = "#A83262",
column_labels.border.bottom.color = "#A83262",
table.font.size = px(16),
heading.title.font.size = px(18),
table.background.color = "#FFF4CC"
)
``` ```
# Cox Model (with intervals?) # Cox Model (with intervals?)
@ -434,29 +502,229 @@ eme.age.rec.group$contrast |>
# Identifying Predictors # Identifying Predictors
TODO: write a more generic function ```{r}
# From exercise 9
Waldtest <- function(coxph.object, idx) {
b <- coxph.object$coef[idx]
Sig <- coxph.object$var[idx, idx]
Wald <- b %*% solve(Sig) %*% b
p <- 1 - pchisq(as.numeric(Wald), df = length(idx))
return(data.frame(Wald = Wald, df = length(idx), p = p))
}
test.new.var <- function(old.vars, new.var, i=1) {
new.surv <- reformulate(
termlabels = c(old.vars, new.var),
response = "Surv(follow.up, death)"
)
cox.fit <- coxph(new.surv, data = dat, method = "breslow")
print(cox.fit)
Waldtest(cox.fit, i)
}
```
## Start with nothing
```{r, echo=TRUE}
old.vars <- c()
test.new.var(old.vars, "tx.type", 1)
test.new.var(old.vars, "hla.match", 1)
test.new.var(old.vars, "age.donor", 1)
test.new.var(old.vars, "age.rec", 1)
test.new.var(old.vars, "cold.isc", 1)
test.new.var(old.vars, "sex", 1)
test.new.var(old.vars, "year", 1)
```
## `tx.type` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type")
# test.new.var(old.vars, "hla.match", 2:7)
test.new.var(old.vars, "hla.match", 2)
test.new.var(old.vars, "age.donor", 2)
test.new.var(old.vars, "age.rec", 2)
test.new.var(old.vars, "cold.isc", 2)
test.new.var(old.vars, "sex", 2)
test.new.var(old.vars, "year", 2)
```
`age.rec` has the smallest $p$-value
## `tx.type` + `age.rec` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec")
test.new.var(old.vars, "hla.match", 3)
test.new.var(old.vars, "age.donor", 3)
test.new.var(old.vars, "cold.isc", 3)
test.new.var(old.vars, "sex", 3)
test.new.var(old.vars, "year", 3)
```
`hla.match` has the smallest $p$-value
## `tx.type` + `age.rec` + `hla.match` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec", "hla.match")
test.new.var(old.vars, "age.donor", 4)
test.new.var(old.vars, "cold.isc", 4)
test.new.var(old.vars, "sex", 4)
test.new.var(old.vars, "year", 4)
```
`age.donor` has the smallest $p$-value
## `tx.type` + `age.rec` + `hla.match` + `age.donor` + ?
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor")
test.new.var(old.vars, "cold.isc", 5)
test.new.var(old.vars, "sex", 5)
test.new.var(old.vars, "year", 5)
```
`year` has the smallest $p$-value
## `tx.type` + `age.rec` + `hla.match` + `age.donor` + `year`
```{r, echo=TRUE}
old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor", "year")
test.new.var(old.vars, "cold.isc", 6)
test.new.var(old.vars, "sex", 6)
```
Neither has $p$-value less than 0.05. Stop.
# Full Model
```{r} ```{r}
surv.full <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match +
age.donor + year
cox.full <- coxph(surv.full, data = dat, method = "breslow")
```
```{r, echo=TRUE}
cox.full
```
```{r, fig.width=7, fig.height=5}
tidy(cox.full, exponentiate = TRUE, conf.int = TRUE) |>
mutate(
term = recode(
term,
"tx.typeLiving" = "Living donor",
"age.rec" = "Recipient age",
"hla.match" = "HLA match",
"age.donor" = "Donor age",
"year" = "Transplant year"
)
) |>
ggplot(aes(x = estimate, y = reorder(term, estimate))) +
geom_vline(xintercept = 1, linetype = "dashed") +
geom_errorbarh(
aes(xmin = conf.low, xmax = conf.high),
height = 0.2
) +
scale_x_log10() +
geom_point(size = 2) +
labs(
title = "Hazard Ratios from Full Cox Model",
x = "Hazard Ratio",
y = NULL
) +
my.theme
```
## What happened to `year`?
```{r}
ggplot(data = dat) +
geom_boxplot(aes(x = factor(year), y = follow.up))
```
```{r}
ggplot(data = dat) +
geom_boxplot(aes(x = factor(year), y = follow.up)) +
facet_grid(death ~ .)
```
Probabily want to omit it
## Without `year`
```{r}
surv.full.1 <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match +
age.donor
cox.full.1 <- coxph(surv.full.1, data = dat, method = "breslow")
```
```{r, echo=TRUE}
cox.full.1
```
```{r, fig.width=7, fig.height=5}
tidy(cox.full.1, exponentiate = TRUE, conf.int = TRUE) |>
mutate(
term = recode(
term,
"tx.typeLiving" = "Living donor",
"age.rec" = "Recipient age",
"hla.match" = "HLA match",
"age.donor" = "Donor age",
)
) |>
ggplot(aes(x = estimate, y = reorder(term, estimate))) +
geom_vline(xintercept = 1, linetype = "dashed") +
geom_errorbarh(
aes(xmin = conf.low, xmax = conf.high),
height = 0.2
) +
scale_x_log10() +
geom_point(size = 2) +
labs(
title = "Hazard Ratios from Full Cox Model (Without Year)",
x = "Hazard Ratio",
y = NULL
) +
my.theme
```
---
TODO: write a more generic function (UPDATE: FAILED)
<details>
```{r, eval=FALSE}
# WARNING: BAD, DO NOT USE.
vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex",
"tx.type")
included <- vars == "tx.type"
Waldtest <- function(cox.fit, i.betas){ Waldtest <- function(cox.fit, i.betas){
q <- length(i.betas) q <- length(i.betas)
coefs <- cox.fit$coefficients coefs <- cox.fit$coefficients
var <- cox.fit$var[i.betas, i.betas] var <- cox.fit$var[i.betas, i.betas]
Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas]
p.value <- 1-pchisq(as.numeric(Wald), df = q) p.value <- 1-pchisq(as.numeric(Wald), df = q)
print(p.value)
return(p.value) return(p.value)
} }
```
```{r}
vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex",
"tx.type")
included <- vars == "tx.type"
included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
find.best.surv <- function(included) { find.model.vars <- function(included) {
print('---')
test.vars <- vars[!included] test.vars <- vars[!included]
p.values <- sapply(test.vars, function(test.var) { p.values <- sapply(test.vars, function(test.var) {
new.surv <- reformulate( new.surv <- reformulate(
@ -464,31 +732,24 @@ find.best.surv <- function(included) {
response = "Surv(follow.up, death)" response = "Surv(follow.up, death)"
) )
fit.tx <- coxph(new.surv, data = dat) fit.tx <- coxph(new.surv, data = dat)
Waldtest(fit.tx, sum(included)) Waldtest(fit.tx, 1)
}) })
if (sum(p.values > 0.05) == sum(included)) { if (all(p.values > 0.05)) {
print(3)
return(included)
}
if (sum(included == TRUE) == length(included)) {
print(2)
return(included) return(included)
} }
print(1) new.var <- names(p.values)[which.min(p.values)]
print(which.min(p.values)) included[which(vars == new.var)] <- TRUE
print(included) return(find.model.vars(included))
print(p.values)
print(which.min(p.values))
included[which.min(p.values)] <- TRUE
print(included)
# find.best.surv(included)
} }
included <- find.model.vars(included)
find.best.surv(included) vars[included]
``` ```
</details>
```{r} ```{r}
# surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match # surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match
# fit.cox.model(surv, 3) # fit.cox.model(surv, 3)

File diff suppressed because it is too large Load diff