discussion 2
This commit is contained in:
parent
7b94e4bf64
commit
bce631d9e0
2 changed files with 377 additions and 23 deletions
5
.lintr
Normal file
5
.lintr
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
linters: lintr::linters_with_defaults(
|
||||
object_name_linter = lintr::object_name_linter(
|
||||
styles = c("snake_case", "dotted.case")
|
||||
)
|
||||
)
|
||||
395
report.Rmd
395
report.Rmd
|
|
@ -8,20 +8,34 @@ number-sections: true
|
|||
|
||||
```{r}
|
||||
library(tidyverse)
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
library(survival)
|
||||
library(emmeans)
|
||||
library(foreign)
|
||||
# library(gtsummary)
|
||||
```
|
||||
|
||||
```{r}
|
||||
dat <- read.csv("./unos.txt", sep = "\t")
|
||||
head(dat)
|
||||
```
|
||||
|
||||
```{r}
|
||||
names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
|
||||
"year", "sex", "tx.type", "follow.up")
|
||||
```
|
||||
|
||||
Complete case
|
||||
|
||||
```{r}
|
||||
nrow(dat)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
sapply(names(dat), function(col) {
|
||||
is.na(dat[, col]) |> sum()
|
||||
})
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Exercise
|
||||
|
|
@ -47,6 +61,10 @@ g + geom_boxplot(aes(x = age.rec, y = age.donor, group = age.donor))
|
|||
# dat$age.1 |> table()
|
||||
```
|
||||
|
||||
```{r}
|
||||
g + geom_boxplot(aes(x = year, y = follow.up, group = year))
|
||||
```
|
||||
|
||||
## Exercise 2
|
||||
|
||||
Plot the Kaplan-Meier overall survival curve for pediatric kid-
|
||||
|
|
@ -115,7 +133,7 @@ print(nrow(dat))
|
|||
dat.5.life
|
||||
|
||||
```
|
||||
|
||||
n.removed.acc
|
||||
```{r}
|
||||
dat.5.life <- dat.5.life |>
|
||||
mutate(
|
||||
|
|
@ -128,8 +146,8 @@ dat.5.life <- dat.5.life |>
|
|||
|
||||
|
||||
```{r}
|
||||
get_life_table = function(dat) {
|
||||
dat <- dat |>
|
||||
get_life_table = function(dat.sub, dat) {
|
||||
dat.sub <- dat.sub |>
|
||||
group_by(fu.interval) |>
|
||||
summarize(
|
||||
n.censored = sum(death == 0),
|
||||
|
|
@ -137,24 +155,24 @@ get_life_table = function(dat) {
|
|||
n.at.risk = nrow(dat),
|
||||
)
|
||||
|
||||
for (i in 2:nrow(dat)) {
|
||||
for (i in 2:nrow(dat.sub)) {
|
||||
j <- i - 1
|
||||
|
||||
n.censored.pre <- dat$n.censored[j]
|
||||
n.event.pre <- dat$n.event[j]
|
||||
n.at.risk.pre <- dat$n.at.risk[j]
|
||||
n.censored.pre <- dat.sub$n.censored[j]
|
||||
n.event.pre <- dat.sub$n.event[j]
|
||||
n.at.risk.pre <- dat.sub$n.at.risk[j]
|
||||
|
||||
n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre
|
||||
|
||||
dat$n.at.risk[i] <- n.at.risk
|
||||
dat.sub$n.at.risk[i] <- n.at.risk
|
||||
}
|
||||
|
||||
dat <- dat |>
|
||||
dat.sub <- dat.sub |>
|
||||
mutate(
|
||||
hazard.rate = n.event / n.at.risk
|
||||
)
|
||||
|
||||
return(dat)
|
||||
return(dat.sub)
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -164,12 +182,12 @@ dat.5.tx1 = dat.5[dat.5$tx.type == 1, ]
|
|||
```
|
||||
|
||||
```{r}
|
||||
tx0.life <- get_life_table(dat.5.tx0)
|
||||
tx0.life <- get_life_table(dat.5.tx0, dat[dat$tx.type == 0, ])
|
||||
tx0.life
|
||||
```
|
||||
|
||||
```{r}
|
||||
tx1.life <- get_life_table(dat.5.tx1)
|
||||
tx1.life <- get_life_table(dat.5.tx1, dat[dat$tx.type == 1, ])
|
||||
tx1.life
|
||||
```
|
||||
|
||||
|
|
@ -195,6 +213,159 @@ ggplot(hazard.df, aes(x = fu.interval)) +
|
|||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
General Solution:
|
||||
|
||||
```{r}
|
||||
get.life.table <- function(dat, time.intervals) {
|
||||
n.pop <- nrow(dat)
|
||||
|
||||
dat |>
|
||||
recode.dat(time.intervals) |>
|
||||
group_by(fu.interval) |>
|
||||
summarize(
|
||||
n.censored = sum(.data$death == 0),
|
||||
n.event = sum(.data$death),
|
||||
) |>
|
||||
ungroup() |>
|
||||
calculate.hazard(n.pop)
|
||||
}
|
||||
|
||||
get.life.table.by.groups <- function(dat, time.intervals, grps) {
|
||||
grps |>
|
||||
lapply(function(grp) {
|
||||
dat |>
|
||||
get.life.table.by.group(time.intervals, grp) |>
|
||||
mutate(
|
||||
grp.name = grp,
|
||||
grp.value = pick(1)[[1]]
|
||||
) |>
|
||||
select(-1)
|
||||
}) |>
|
||||
bind_rows()
|
||||
}
|
||||
|
||||
get.life.table.by.group <- function(dat, time.intervals, grp) {
|
||||
dat |>
|
||||
recode.dat(time.intervals) |>
|
||||
group_by(fu.interval, .data[[grp]]) |>
|
||||
summarize(
|
||||
n.censored = sum(.data$death == 0),
|
||||
n.event = sum(.data$death),
|
||||
.groups = "keep"
|
||||
) |>
|
||||
ungroup(fu.interval) |>
|
||||
group_modify(function(df.sub, grp) {
|
||||
grp.name <- names(grp)
|
||||
grp.value <- grp[[1]]
|
||||
n.pop <- (dat[[grp.name]] == grp.value) |> sum()
|
||||
calculate.hazard(df.sub, n.pop)
|
||||
}) |>
|
||||
ungroup()
|
||||
}
|
||||
|
||||
calculate.hazard <- function(life.table, n.pop) {
|
||||
n.removed <- life.table$n.event + life.table$n.censored
|
||||
n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)])
|
||||
life.table |>
|
||||
mutate(
|
||||
n.at.risk = n.pop - n.removed.accum,
|
||||
# TODO: how to account for censored? How do we adjust for uneven interval?
|
||||
hazard.rate = n.event / n.at.risk
|
||||
)
|
||||
}
|
||||
|
||||
recode.dat <- function(dat, time.intervals) {
|
||||
df <- dat[dat$follow.up <= sum(time.intervals), ]
|
||||
time.points <- cumsum(time.intervals)
|
||||
df$fu.interval <- sapply(df$follow.up, function(time) {
|
||||
time.points[time <= time.points][1]
|
||||
})
|
||||
|
||||
df
|
||||
}
|
||||
|
||||
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1)
|
||||
grps <- c("tx.type", "sex", "hla.match", "cold.isc")
|
||||
|
||||
get.life.table.by.groups(dat, time.intervals, grps)
|
||||
```
|
||||
|
||||
```{r}
|
||||
grps <- c("tx.type", "sex", "hla.match", "cold.isc")
|
||||
grps <- c("tx.type")
|
||||
|
||||
|
||||
plot.hazard.rate.by.groups <- function(dat, time.intervals, grps) {
|
||||
dat |>
|
||||
get.life.table.by.groups(time.intervals, grps) |>
|
||||
ggplot(aes(x = fu.interval, y = hazard.rate,
|
||||
color = paste(grp.name, grp.value, sep = " = "))) +
|
||||
geom_point() +
|
||||
geom_line() +
|
||||
labs(
|
||||
color = "Group"
|
||||
) +
|
||||
theme(
|
||||
legend.position = "bottom"
|
||||
)
|
||||
}
|
||||
|
||||
```
|
||||
```{r}
|
||||
time.intervals <- c(rep(1/3, 3), rep(1, 4))
|
||||
plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
time.intervals <- rep(1, 5)
|
||||
plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
time.intervals <- rep(1/3, 15)
|
||||
plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
time.intervals <- c(rep(1/3, 3*3), rep(1, 2))
|
||||
plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
time.intervals <- c(rep(1/4, 4), rep(1, 4))
|
||||
plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
time.intervals <- c(rep(1/4, 4*2), rep(1, 3))
|
||||
plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
time.intervals <- c(rep(1/4, 4*2), rep(1/3, 3*3), rep(1, 6))
|
||||
plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Compare
|
||||
|
||||
```{r}
|
||||
time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1)
|
||||
get.life.table.by.groups(dat, time.intervals, grps)
|
||||
```
|
||||
|
||||
```{r}
|
||||
hazard.df
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Exercise 4
|
||||
|
|
@ -214,31 +385,209 @@ Fit a univariate Cox model with predictor donor type. Report
|
|||
the hazard ratio and 95% confidence interval and interpret the result obtained.
|
||||
|
||||
```{r}
|
||||
cox <- coxph(Surv(follow.up, death) ~ tx.type, data = dat)
|
||||
summary(cox)
|
||||
cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat)
|
||||
summary(cox.tx)
|
||||
```
|
||||
|
||||
```{r}
|
||||
1.90539
|
||||
log.hazard.ratio.hat <- cox.tx$coefficient
|
||||
hazard.ratio.hat <- log.hazard.ratio.hat |> exp()
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
1.90539 + 1.96 * exp(0.09558)
|
||||
(2.298 - 1.58) / 2 |> exp() / 2
|
||||
log.hazard.ratio.se <- summary(cox.tx)$coefficients[3]
|
||||
log.hazard.ratio.hat.lower.bound <- log.hazard.ratio.hat - qnorm(0.975) * log.hazard.ratio.se
|
||||
log.hazard.ratio.hat.upper.bound <- log.hazard.ratio.hat + qnorm(0.975) * log.hazard.ratio.se
|
||||
|
||||
c(log.hazard.ratio.hat.lower.bound, log.hazard.ratio.hat.upper.bound) |> exp()
|
||||
```
|
||||
|
||||
Exercise 6 — Research shows that an important determinant of mortality
|
||||
::: {.callout-note}
|
||||
Patients receiving kidney transplants from living donors (`tx.type = 1`) has
|
||||
1.91 higher hazard rate than those receiving from cadaveric donors
|
||||
(`tx.type = 0`).
|
||||
|
||||
Since 1 is not in the 95% confidence interval, the result is statistically
|
||||
significant.
|
||||
:::
|
||||
|
||||
|
||||
# Exercise 6
|
||||
|
||||
Research shows that an important determinant of mortality
|
||||
after kidney transplant is the age of the recipient. Fit a Cox model with age
|
||||
as predictor and estimate the hazard ratio and its confidence interval. Consider
|
||||
age first as continuous variable and then divide into categories.
|
||||
|
||||
Exercise 7 — Fit a multivariate Cox model by using other predictors and
|
||||
## Continuous
|
||||
|
||||
```{r}
|
||||
cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat)
|
||||
cox.age.rec |> summary()
|
||||
```
|
||||
|
||||
## Categorical (Case 1)
|
||||
|
||||
```{r}
|
||||
recode.age <- function(dat, ages) {
|
||||
df$age.group <- sapply(df$follow.up, function(time) {
|
||||
time.points[time <= time.points][1]
|
||||
})
|
||||
|
||||
df
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
dat.age.rec <- dat[!is.na(dat$age.rec), ]
|
||||
class.age <- function(age) {
|
||||
if (age < 2) {
|
||||
return("0,1")
|
||||
} else if (age < 6) {
|
||||
return("2,3,4,5")
|
||||
} else if (age < 11) {
|
||||
return("6,7,8,9,10")
|
||||
} else if (age < 19) {
|
||||
return("11-18")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |>
|
||||
factor(levels = c(
|
||||
"0,1",
|
||||
"2,3,4,5",
|
||||
"6,7,8,9,10",
|
||||
"11-18"
|
||||
))
|
||||
|
||||
|
||||
cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec)
|
||||
cox.age.rec |> summary()
|
||||
```
|
||||
|
||||
```{r}
|
||||
emmeans(cox.age.rec, pairwise ~ age.group, type = "response")
|
||||
```
|
||||
|
||||
|
||||
# Exercise 7
|
||||
|
||||
Fit a multivariate Cox model by using other predictors and
|
||||
describe your results.
|
||||
|
||||
Exercise 8 — Estimate the survival function for specific covariate patterns.
|
||||
|
||||
```{r}
|
||||
sapply(names(dat), function(col) {
|
||||
is.na(dat[, col]) |> sum()
|
||||
})
|
||||
```
|
||||
|
||||
```{r}
|
||||
names(dat)
|
||||
```
|
||||
|
||||
```{r}
|
||||
Waldtest <- function(cox.fit, i.betas){
|
||||
q <- length(i.betas)
|
||||
coefs <- cox.fit$coefficients
|
||||
# print(cox.fit$var)
|
||||
var <- cox.fit$var[i.betas, i.betas]
|
||||
Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas]
|
||||
p.value <- 1-pchisq(as.numeric(Wald), df = q)
|
||||
return(cbind(Wald, p.value))
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
fit.cox.model <- function(surv, i.betas) {
|
||||
fit.tx <- coxph(surv, data = dat)
|
||||
Waldtest(fit.tx, i.betas)
|
||||
}
|
||||
```
|
||||
|
||||
Start with `tx.type`
|
||||
|
||||
```{r}
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec
|
||||
fit.cox.model(surv, 1:2) # NOTE: Why is this 1???
|
||||
```
|
||||
|
||||
Iteration 1: `tx.type + age + ?`
|
||||
|
||||
```{r}
|
||||
names(dat)
|
||||
```
|
||||
|
||||
```{r}
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match
|
||||
fit.cox.model(surv, 3)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor
|
||||
fit.cox.model(surv, 3)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc
|
||||
fit.cox.model(surv, 3)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + year
|
||||
fit.cox.model(surv, 3)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex
|
||||
fit.cox.model(surv, 3)
|
||||
```
|
||||
|
||||
Add `hla.match`
|
||||
|
||||
Iteration 2: `tx.type + age.rec + hla.match + ?`
|
||||
|
||||
```{r}
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor
|
||||
fit.cox.model(surv, 4)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + cold.isc
|
||||
fit.cox.model(surv, 4)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + year
|
||||
fit.cox.model(surv, 4)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + sex
|
||||
fit.cox.model(surv, 4)
|
||||
```
|
||||
|
||||
Add `age.donor`
|
||||
|
||||
Iteration 2: `tx.type + age.rec + hla.match + age.donor + ?`
|
||||
|
||||
```{r}
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor +
|
||||
cold.isc
|
||||
fit.cox.model(surv, 5)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor +
|
||||
year
|
||||
fit.cox.model(surv, 5)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor +
|
||||
sex
|
||||
fit.cox.model(surv, 5)
|
||||
```
|
||||
|
||||
Add `year`
|
||||
|
||||
```{r}
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor +
|
||||
year + cold.isc
|
||||
fit.cox.model(surv, 6)
|
||||
surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor +
|
||||
year + sex
|
||||
fit.cox.model(surv, 6)
|
||||
```
|
||||
|
||||
|
||||
|
||||
Next, we test for each remaining variables
|
||||
|
||||
# Exercise 8
|
||||
|
||||
Estimate the survival function for specific covariate patterns.
|
||||
Based on the previous results choose the best predictors.
|
||||
|
||||
Exercise 9 — Check the proportional hazards assumption. You may use the
|
||||
function cox.zph. Discuss the result and possible implications.
|
||||
|
||||
Exercise 10 — Plot the Schoenfeld residuals and comment.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue