git init
This commit is contained in:
commit
7b94e4bf64
5 changed files with 10913 additions and 0 deletions
244
report.Rmd
Normal file
244
report.Rmd
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
---
|
||||
title: foo
|
||||
execute:
|
||||
cache: true
|
||||
freeze: auto
|
||||
number-sections: true
|
||||
---
|
||||
|
||||
```{r}
|
||||
library(tidyverse)
|
||||
library(survival)
|
||||
# library(gtsummary)
|
||||
```
|
||||
|
||||
```{r}
|
||||
dat <- read.csv("./unos.txt", sep = "\t")
|
||||
head(dat)
|
||||
```
|
||||
|
||||
```{r}
|
||||
names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
|
||||
"year", "sex", "tx.type", "follow.up")
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Exercise
|
||||
|
||||
## Exercise 1
|
||||
|
||||
> Illustrate in a table the characteristics of the population (age, sex, race,
|
||||
donor, . . . ).
|
||||
|
||||
```{r}
|
||||
g <- ggplot(dat)
|
||||
```
|
||||
|
||||
```{r}
|
||||
g + geom_point(aes(x = follow.up, y = death))
|
||||
g + geom_density(aes(x = follow.up))
|
||||
|
||||
g + geom_density(aes(x = age.donor))
|
||||
g + geom_bar(aes(x = age.rec))
|
||||
|
||||
g + geom_boxplot(aes(x = age.rec, y = age.donor, group = age.donor))
|
||||
|
||||
# dat$age.1 |> table()
|
||||
```
|
||||
|
||||
## Exercise 2
|
||||
|
||||
Plot the Kaplan-Meier overall survival curve for pediatric kid-
|
||||
ney transplant recipients for the first 12 years after transplantation.
|
||||
|
||||
```{r}
|
||||
km <- survfit(Surv(follow.up, death) ~ 1, data = dat[dat$follow.up <= 12, ])
|
||||
plot(km)
|
||||
|
||||
|
||||
|
||||
```
|
||||
|
||||
## Exercise 3
|
||||
|
||||
We are going to compare mortality rates (hazard functions)
|
||||
between children whose transplanted kidney was provided by a living donor
|
||||
(in general a family member) and those whose source was recently deceased
|
||||
(variable donor type: `txtype`). Use the life table method to calculate the death
|
||||
rates for the first 5 years for each group (take in the first year intervals of 4
|
||||
months and then look at each year) and show the results in a table. Estimate
|
||||
the hazard ratio in each time interval as the ratio between the mortality rates
|
||||
in the two groups. What do you notice?
|
||||
|
||||
```{r}
|
||||
dat.5 <- dat[dat$follow.up <= 5, ]
|
||||
head(dat.5)
|
||||
classify_time_interval = function(fu) {
|
||||
if (fu <= 1/3) {
|
||||
return(1/3)
|
||||
} else if (fu <= 2/3) {
|
||||
return(2/3)
|
||||
} else if (fu <= 1) {
|
||||
return(1)
|
||||
}
|
||||
ceiling(fu)
|
||||
}
|
||||
|
||||
dat.5$fu.interval <- sapply(dat.5$follow.up, classify_time_interval)
|
||||
table(dat.5$fu.interval)
|
||||
head(dat.5)
|
||||
```
|
||||
|
||||
```{r}
|
||||
dat.5.life <- dat.5 |>
|
||||
group_by(fu.interval) |>
|
||||
summarize(
|
||||
n.censored = sum(death == 0),
|
||||
n.event = sum(death),
|
||||
n.at.risk = nrow(dat),
|
||||
)
|
||||
|
||||
for (i in 2:nrow(dat.5.life)) {
|
||||
j <- i - 1
|
||||
|
||||
n.censored.pre <- dat.5.life$n.censored[j]
|
||||
n.event.pre <- dat.5.life$n.event[j]
|
||||
n.at.risk.pre <- dat.5.life$n.at.risk[j]
|
||||
|
||||
n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre
|
||||
|
||||
dat.5.life$n.at.risk[i] <- n.at.risk
|
||||
}
|
||||
|
||||
print(nrow(dat))
|
||||
dat.5.life
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
dat.5.life <- dat.5.life |>
|
||||
mutate(
|
||||
hazard.rate = n.event / n.at.risk
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
```{r}
|
||||
get_life_table = function(dat) {
|
||||
dat <- dat |>
|
||||
group_by(fu.interval) |>
|
||||
summarize(
|
||||
n.censored = sum(death == 0),
|
||||
n.event = sum(death),
|
||||
n.at.risk = nrow(dat),
|
||||
)
|
||||
|
||||
for (i in 2:nrow(dat)) {
|
||||
j <- i - 1
|
||||
|
||||
n.censored.pre <- dat$n.censored[j]
|
||||
n.event.pre <- dat$n.event[j]
|
||||
n.at.risk.pre <- dat$n.at.risk[j]
|
||||
|
||||
n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre
|
||||
|
||||
dat$n.at.risk[i] <- n.at.risk
|
||||
}
|
||||
|
||||
dat <- dat |>
|
||||
mutate(
|
||||
hazard.rate = n.event / n.at.risk
|
||||
)
|
||||
|
||||
return(dat)
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
dat.5.tx0 = dat.5[dat.5$tx.type == 0, ]
|
||||
dat.5.tx1 = dat.5[dat.5$tx.type == 1, ]
|
||||
```
|
||||
|
||||
```{r}
|
||||
tx0.life <- get_life_table(dat.5.tx0)
|
||||
tx0.life
|
||||
```
|
||||
|
||||
```{r}
|
||||
tx1.life <- get_life_table(dat.5.tx1)
|
||||
tx1.life
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
tx1.life$hazard.rate / tx0.life$hazard.rate
|
||||
```
|
||||
|
||||
```{r}
|
||||
hazard.df <- data.frame(
|
||||
fu.interval = tx1.life$fu.interval,
|
||||
hazard.rate.0 = tx0.life$hazard.rate,
|
||||
hazard.rate.1 = tx1.life$hazard.rate,
|
||||
hazard.ratio = tx1.life$hazard.rate / tx0.life$hazard.rate
|
||||
)
|
||||
|
||||
ggplot(hazard.df, aes(x = fu.interval)) +
|
||||
geom_line(aes(y = hazard.rate.0), color = "blue") +
|
||||
geom_line(aes(y = hazard.rate.1), color = "orange")
|
||||
|
||||
ggplot(hazard.df, aes(x = fu.interval)) +
|
||||
geom_line(aes(y = hazard.ratio))
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Exercise 4
|
||||
|
||||
Show a plot with Kaplan-Meier survival curves for the two donor types.
|
||||
|
||||
```{r}
|
||||
km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat[dat$follow.up <= 12, ])
|
||||
|
||||
plot(km.tx, col = c("blue", "orange"))
|
||||
legend(legend = c("cadaveric", "living"), "bottomleft", lwd = 2, col = c("blue", "orange"))
|
||||
```
|
||||
|
||||
## Exercise 5
|
||||
|
||||
Fit a univariate Cox model with predictor donor type. Report
|
||||
the hazard ratio and 95% confidence interval and interpret the result obtained.
|
||||
|
||||
```{r}
|
||||
cox <- coxph(Surv(follow.up, death) ~ tx.type, data = dat)
|
||||
summary(cox)
|
||||
```
|
||||
|
||||
```{r}
|
||||
1.90539
|
||||
```
|
||||
|
||||
```{r}
|
||||
1.90539 + 1.96 * exp(0.09558)
|
||||
(2.298 - 1.58) / 2 |> exp() / 2
|
||||
```
|
||||
|
||||
Exercise 6 — Research shows that an important determinant of mortality
|
||||
after kidney transplant is the age of the recipient. Fit a Cox model with age
|
||||
as predictor and estimate the hazard ratio and its confidence interval. Consider
|
||||
age first as continuous variable and then divide into categories.
|
||||
|
||||
Exercise 7 — Fit a multivariate Cox model by using other predictors and
|
||||
describe your results.
|
||||
|
||||
Exercise 8 — Estimate the survival function for specific covariate patterns.
|
||||
Based on the previous results choose the best predictors.
|
||||
|
||||
Exercise 9 — Check the proportional hazards assumption. You may use the
|
||||
function cox.zph. Discuss the result and possible implications.
|
||||
Exercise 10 — Plot the Schoenfeld residuals and comment.
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue