git init

2026-05-09 18:49:31 +02:00 · 2026-05-09 18:49:31 +02:00 · 7b94e4bf64
commit 7b94e4bf64
5 changed files with 10913 additions and 0 deletions
--- a/report.Rmd
+++ b/report.Rmd
@ -0,0 +1,244 @@
+---
+title: foo
+execute:
+  cache: true
+  freeze: auto
+number-sections: true
+---
+
+```{r}
+library(tidyverse)
+library(survival)
+# library(gtsummary)
+```
+
+```{r}
+dat <- read.csv("./unos.txt", sep = "\t")
+head(dat)
+```
+
+```{r}
+names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
+                "year", "sex", "tx.type", "follow.up")
+```
+
+
+
+# Exercise
+
+## Exercise 1 
+
+> Illustrate in a table the characteristics of the population (age, sex, race, 
+donor, . . . ).
+
+```{r}
+g <- ggplot(dat)
+```
+
+```{r}
+g + geom_point(aes(x = follow.up, y = death))
+g +  geom_density(aes(x = follow.up))
+
+g +  geom_density(aes(x = age.donor))
+g +  geom_bar(aes(x = age.rec))
+
+g + geom_boxplot(aes(x = age.rec, y = age.donor, group = age.donor))
+
+# dat$age.1 |> table()
+```
+
+## Exercise 2
+
+Plot the Kaplan-Meier overall survival curve for pediatric kid-
+ney transplant recipients for the first 12 years after transplantation.
+
+```{r}
+km <- survfit(Surv(follow.up, death) ~ 1, data = dat[dat$follow.up  <= 12, ])
+plot(km)
+
+
+
+```
+
+## Exercise 3
+
+We are going to compare mortality rates (hazard functions)
+between children whose transplanted kidney was provided by a living donor
+(in general a family member) and those whose source was recently deceased
+(variable donor type: `txtype`). Use the life table method to calculate the death
+rates for the first 5 years for each group (take in the first year intervals of 4
+months and then look at each year) and show the results in a table. Estimate
+the hazard ratio in each time interval as the ratio between the mortality rates
+in the two groups. What do you notice?
+
+```{r}
+dat.5 <- dat[dat$follow.up <= 5, ]
+head(dat.5)
+classify_time_interval = function(fu) {
+  if (fu <= 1/3) {
+    return(1/3)
+  } else if (fu <= 2/3) {
+    return(2/3)
+  } else if (fu <= 1) {
+    return(1)
+  }
+  ceiling(fu)
+}
+
+dat.5$fu.interval <- sapply(dat.5$follow.up, classify_time_interval)
+table(dat.5$fu.interval)
+head(dat.5)
+```
+
+```{r}
+dat.5.life <- dat.5 |>
+  group_by(fu.interval) |>
+  summarize(
+    n.censored = sum(death == 0),
+    n.event = sum(death),
+    n.at.risk = nrow(dat),
+  )
+
+for (i in 2:nrow(dat.5.life)) {
+  j <- i - 1
+
+  n.censored.pre <- dat.5.life$n.censored[j]
+  n.event.pre <- dat.5.life$n.event[j]
+  n.at.risk.pre <- dat.5.life$n.at.risk[j]
+
+  n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre
+
+  dat.5.life$n.at.risk[i] <- n.at.risk
+}
+
+print(nrow(dat))
+dat.5.life
+
+```
+
+```{r}
+dat.5.life <- dat.5.life |>
+  mutate(
+     hazard.rate = n.event / n.at.risk
+  )
+
+```
+
+---
+
+
+```{r}
+get_life_table = function(dat) {
+  dat <- dat |>
+    group_by(fu.interval) |>
+    summarize(
+      n.censored = sum(death == 0),
+      n.event = sum(death),
+      n.at.risk = nrow(dat),
+    )
+
+  for (i in 2:nrow(dat)) {
+    j <- i - 1
+
+    n.censored.pre <- dat$n.censored[j]
+    n.event.pre <- dat$n.event[j]
+    n.at.risk.pre <- dat$n.at.risk[j]
+
+    n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre
+
+    dat$n.at.risk[i] <- n.at.risk
+  }
+
+  dat <- dat |>
+    mutate(
+       hazard.rate = n.event / n.at.risk
+    )
+
+  return(dat)
+}
+```
+
+```{r}
+dat.5.tx0 = dat.5[dat.5$tx.type == 0, ]
+dat.5.tx1 = dat.5[dat.5$tx.type == 1, ]
+```
+
+```{r}
+tx0.life <- get_life_table(dat.5.tx0)
+tx0.life
+```
+
+```{r}
+tx1.life <- get_life_table(dat.5.tx1)
+tx1.life
+```
+
+
+```{r}
+tx1.life$hazard.rate / tx0.life$hazard.rate
+```
+
+```{r}
+hazard.df <- data.frame(
+  fu.interval = tx1.life$fu.interval,
+  hazard.rate.0 = tx0.life$hazard.rate,
+  hazard.rate.1 = tx1.life$hazard.rate,
+  hazard.ratio = tx1.life$hazard.rate / tx0.life$hazard.rate
+) 
+
+ggplot(hazard.df, aes(x = fu.interval)) +
+  geom_line(aes(y = hazard.rate.0), color = "blue") + 
+  geom_line(aes(y = hazard.rate.1), color = "orange")
+
+ggplot(hazard.df, aes(x = fu.interval)) +
+  geom_line(aes(y = hazard.ratio))
+
+```
+
+
+
+## Exercise 4
+
+Show a plot with Kaplan-Meier survival curves for the two donor types.
+
+```{r}
+km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat[dat$follow.up  <= 12, ])
+
+plot(km.tx, col = c("blue", "orange"))
+legend(legend = c("cadaveric", "living"), "bottomleft", lwd = 2, col = c("blue", "orange"))
+```
+
+## Exercise 5
+
+Fit a univariate Cox model with predictor donor type. Report
+the hazard ratio and 95% confidence interval and interpret the result obtained.
+
+```{r}
+cox <- coxph(Surv(follow.up, death) ~ tx.type, data = dat)
+summary(cox)
+```
+
+```{r}
+1.90539
+```
+
+```{r}
+1.90539 + 1.96 * exp(0.09558)
+(2.298 - 1.58) / 2 |> exp() / 2
+```
+
+Exercise 6 — Research shows that an important determinant of mortality
+after kidney transplant is the age of the recipient. Fit a Cox model with age
+as predictor and estimate the hazard ratio and its confidence interval. Consider
+age first as continuous variable and then divide into categories.
+
+Exercise 7 — Fit a multivariate Cox model by using other predictors and
+describe your results.
+
+Exercise 8 — Estimate the survival function for specific covariate patterns.
+Based on the previous results choose the best predictors.
+
+Exercise 9 — Check the proportional hazards assumption. You may use the
+function cox.zph. Discuss the result and possible implications.
+Exercise 10 — Plot the Schoenfeld residuals and comment.
+