diff --git a/report.Rmd b/report.Rmd
index f5f6edd..f51fe1b 100644
--- a/report.Rmd
+++ b/report.Rmd
@@ -546,23 +546,20 @@ recode.age <- function(dat, ages) {
 dat.age.rec <- dat[!is.na(dat$age.rec), ]
 class.age <- function(age) {
   if (age < 2) { 
-    return("0,1")
+    return("0-1")
   } else if (age < 6) {
-    return("2,3,4,5")
+    return("2-5")
   } else if (age < 11) {
-    return("6,7,8,9,10")
+    return("6-10")
   } else if (age < 19) {
     return("11-18")
   }
 }
-```
-
-```{r}
 dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |>
   factor(levels = c(
-    "0,1",
-    "2,3,4,5",
-    "6,7,8,9,10",
+    "0-1",
+    "2-5",
+    "6-10",
     "11-18"
   ))
 
@@ -574,9 +571,6 @@ cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec)
 emmeans(cox.age.rec, pairwise ~ age.group, type = "response")
 ```
 
-```{r}
-cox.age.rec |> ggsurvfit()
-```
 
 
 # Exercise 7
@@ -705,6 +699,7 @@ H1: PH-assumption doesn't hold
 # different cox model!!
 cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat)
 cox.zph(cox.final)
+cox.final
 ```
 
 
@@ -715,6 +710,11 @@ A non-random pattern or slope in a plot of scaled residuals against time indicat
 plot(cox.zph(cox.final))
 ```
 
+```{r, fig.width=10, fig.height=10}
+plot(cox.zph(cox.final))
+```
+
+
 
 
 NOTES:
diff --git a/slides.Rmd b/slides.Rmd
index 871e462..024b4b4 100644
--- a/slides.Rmd
+++ b/slides.Rmd
@@ -5,6 +5,7 @@ execute:
   freeze: auto
   include: true
   echo: false
+  warning: false
 number-sections: true
 ---
 
@@ -18,8 +19,12 @@ library(foreign)
 library(gtsummary)
 library(gt)
 library(ggsurvfit)
+library(gridExtra)
+library(survminer)
+library(viridis)
 ```
 
+
 ```{r}
 dat <- read.csv("./unos.txt", sep = "\t")
 names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
@@ -33,306 +38,565 @@ dat <- dat |>
   )
 ```
 
-# Introduction: research question
-
-## Survival after transplantation (M)
-
-## Identify Predictors (M)
-
-## Why Survival Analysis (L)
-
-Motivation: study the distribution of time to event $T$.
-
-Example: time of death after kidney transplant. 
-
-
 ```{r}
-ex <- data.frame(
-  id = c(1, 2, 3, 4, 5),
-  transplant = c(2000, 2000, 2001, 2003, 2004),
-  death = c(2005, 2009, 2005, 2004, 2016)
-)
-
-ex_table_1 <- ex |>
-  gt() |>
-  cols_label(
-    id = "ID",
-    transplant = "Transplant",
-    death = "Death"
-  ) %>%
-  cols_align(align = "center", columns = everything())
-
-ex_plot_real_time <- ggplot(ex) +
-  geom_segment(aes(x = transplant, xend = death, y = factor(id),
-                   yend = factor(id)), color = "grey50", linewidth = 1) +
-  geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
-  geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) +
-  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
-  scale_y_discrete(limits = rev) +
-  labs(x = "Year", y = "Subject ID", color = "Event") +
-  theme_minimal()
-```
-
-```{r}
-ex_table_1
-ex_plot_real_time
-```
-
----
-
-We then calculate time to death for each respondents. 
-
-With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X`
-
-```{r}
-ex_t <- ex |>
-  mutate(
-    t = death - transplant
-  )
-
-ex_table_2 <- ex_t |>
-  gt() |>
-  cols_label(
-    id = "ID",
-    transplant = "Transplant",
-    death = "Death",
-    t = "Time to death"
-  ) |>
-  cols_align(align = "center", columns = everything())
-
-ex_plot_uniform_time <- ggplot(ex_t) +
-  geom_segment(aes(x = 0, xend = t, y = factor(id),
-                   yend = factor(id)), color = "grey50", linewidth = 1) +
-  geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
-  geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) +
-  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
-  scale_y_discrete(limits = rev) +
-  labs(x = "Year", y = "Subject ID", color = "Event") +
-  theme_minimal()
-
-```
-
-
-```{r}
-ex_table_2
-ex_plot_uniform_time
-```
-
----
-
-What if we do not know when exactly some respondents die?
-
-Scenario 1: the study ends at the year 2008?
-
-```{r}
-ex_t_c <- ex_t |>
-  mutate(
-    death_censored = if_else(death <= 2008, death, 2008),
-    death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"),
-    status = if_else(death <= 2008, 1, 0),
-    t_1 = death_censored - transplant,
-    t_1_txt = if_else(death <= 2008, as.character(t_1),
-                      paste(">", as.character(t_1)))
+my.theme <- theme_minimal() +
+  theme(
+    legend.position = "bottom",
+    plot.title = element_text(size = 17, face = "bold"),
+    axis.title = element_text(size = 15),
+    axis.text = element_text(size = 12),
+    legend.text = element_text(size = 12),
+    plot.margin = margin(20, 30, 20, 30)
   )
 ```
 
-```{r}
-ex_table_3 <- ex_t_c |>
-  select(id, transplant, death_censored_txt, t_1_txt) |>
-  gt() |>
-  cols_label(
-    id = "ID",
-    transplant = "Transplant",
-    death_censored_txt = "Death",
-    t_1_txt = "Time to death"
-  ) |>
-  cols_align(align = "center", columns = everything())
+::: {.callout-warning}
+There are 25 repondents with `follow.up = 0`, 21 of which with `death = 0` and
+other 4 with `death = 1`.
 
-ex_plot_real_time_1 <- ggplot(ex_t_c) +
-  geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
-                   yend = factor(id)), color = "grey50", linewidth = 1) +
-  geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
-  geom_point(aes(x = death_censored, y = factor(id), color =
-                   if_else(status == 1, "death", "censored")), size = 3) +
-  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
-                                "censored" = "orange")) +
-  scale_y_discrete(limits = rev) +
-  labs(x = "Year", y = "Subject ID", color = "Event") +
-  xlim(2000, 2016) +
-  geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
-             linewidth = 0.8) +
-  theme_minimal()
-
-
-ex_plot_uniform_time_1 <- ggplot(ex_t_c) +
-  geom_segment(aes(x = 0, xend = t_1, y = factor(id),
-                   yend = factor(id)), color = "grey50", linewidth = 1) +
-  geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
-  geom_point(aes(x = t_1, y = factor(id), color =
-                   if_else(status == 1, "death", "censored")), size = 3) +
-  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
-                                "censored" = "orange")) +
-  scale_y_discrete(limits = rev) +
-  labs(x = "Year", y = "Subject ID", color = "Event") +
-  theme_minimal()
+```{r, echo=TRUE}
+dat[dat$follow.up == 0, ] |>
+  group_by(death) |>
+  summarise(n = n())
 ```
 
+Reasons to omit them, that is, have `dat <- dat[dat$follow.up > 0,]` at the 
+very beginning:
 
-```{r}
-ex_table_3
-ex_plot_real_time_1
-ex_plot_uniform_time_1
-```
+- If `death = 1`, transplant failed. They shouldn't be included in the research
+  question "survival rate of a *successful* tranplant".
+- If `death = 0`, transplant was successful, but they were immediately removed
+  from populatoin at risk, contributiong nothing to the analysis.
+:::
 
-**Right Censoring**: only observe the event (death) if it occurs before a 
-certain time (2008).
+# Title 
 
----
+# Table of Contents (?)
 
-Scenario 2: respondent 3 move away; loss follow up
-
-```{r}
-ex_alt <- ex_t_c
-ex_alt$death_censored_txt[3] <- "> 2005"
-ex_alt$status[3] <- 0
-ex_alt$t_1_txt[3] <- "> 4"
-
-
-ex_table_4 <- ex_alt |>
-  select(id, transplant, death_censored_txt, t_1_txt) |>
-  gt() |>
-  cols_label(
-    id = "ID",
-    transplant = "Transplant",
-    death_censored_txt = "Death",
-    t_1_txt = "Time to death"
-  ) |>
-  cols_align(align = "center", columns = everything())
-
-ex_plot_real_time_2 <- ggplot(ex_alt) +
-  geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
-                   yend = factor(id)), color = "grey50", linewidth = 1) +
-  geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
-  geom_point(aes(x = death_censored, y = factor(id), color =
-                   if_else(status == 1, "death", "censored")), size = 3) +
-  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
-                                "censored" = "orange")) +
-  scale_y_discrete(limits = rev) +
-  labs(x = "Year", y = "Subject ID", color = "Event") +
-  xlim(2000, 2016) +
-  geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
-             linewidth = 0.8) +
-  theme_minimal()
-
-
-ex_plot_uniform_time_2 <- ggplot(ex_alt) +
-  geom_segment(aes(x = 0, xend = t_1, y = factor(id),
-                   yend = factor(id)), color = "grey50", linewidth = 1) +
-  geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
-  geom_point(aes(x = t_1, y = factor(id), color =
-                   if_else(status == 1, "death", "censored")), size = 3) +
-  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
-                                "censored" = "orange")) +
-  scale_y_discrete(limits = rev) +
-  labs(x = "Year", y = "Subject ID", color = "Event") +
-  theme_minimal()
-```
-
-```{r}
-ex_table_4
-ex_plot_real_time_2
-ex_plot_uniform_time_2
-```
-
----
-
-How many patients are right censored?
-
-```{r}
-dat |>
-  mutate(Overall = "Overall") |>
-  pivot_longer(
-    cols = c(Overall, sex, tx.type), 
-    names_to = "Attribute", 
-    values_to = "Category"
-  ) |>
-  count(Attribute, Category, death) |>
-  ggplot(aes(x = Category, y = n, fill = factor(death))) +
-  geom_col(position = "dodge") +
-  facet_wrap(~ Attribute, scales = "free_x") +
-  labs(x = "Group", y = "Count", fill = "Death Status") +
-  theme_minimal()
-```
-
-## Challenges in Survival Analysis (L)
-
-- **Right Censoring**: only observe the event if it occurs before a certain 
-  time.
-- **Left Censoring**: event has occurred prior to the start of a research
-  - Follow up every 3 years.
-  - Event has occurred -> event happened sometime before follow up.
-- **Left Truncation**: delayed entry; respondents are included only if they
-  survived long enough.
-  - Start follow up with patients 100 days after the transplant
-  - Patients dies within 100 day wouldn't be included in the dataset
-- **Right Truncation**: respondents are included only if they have already
-  experienced the event.
-  - Retrospective analysis from deceased patients between 1990 and 2000.
-  - Patients not dead before 2000 are not included in the dataset.
+# Introduction (M)
 
 # Method & Result
 
-## Explain Dataset (M)
+## Identify Predictors (M)
 
-::: {.callout-note}
-# Things to highlight in figures
+- Data source
+- Study population
+```{r}
+nrow(dat)
+```
+- Covariates
+- Outcome
+```{r}
+names(dat)
+```
 
-- sex and recipient age are similar across `tx.type`
+## Table (L)
 
-- Living donors have less `hla.match` then cadaveric donors
+```{r}
+dat |>
+  select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> 
+  tbl_summary(
+    by = tx.type,
+    statistic = list(
+      all_continuous() ~ "{median} ({p25}, {p75})"
+    ),
+    label = list(
+      hla.match ~ "HLA matches, n(%)",
+      age.donor ~ "Donor age, median (IQR)",
+      age.rec ~ "Recipient age, median (IQR)",
+      cold.isc ~ "Cold ischemic time (hours), median (IQR), ",
+      sex ~ "Sex, n(%)"
+    ),
+    missing = "ifany"
+  ) |>
+  add_overall() |>
+  modify_footnote(all_stat_cols() ~ NA)
+```
 
-- There is age difference between different `tx.type`
-:::
+# Elementary Analysis: Transplant Type (L)
 
+## Transplant Type vs Sex
 
-###  `sex` ~ `tx.tpye`
-
-::: {.callout-note}
 `sex` are similar across `tx.type`
-:::
 
 ```{r, fig.width=7, fig.height=5}
-dat |>
-  count(tx.type, sex) |>
-  group_by(tx.type) |>
+plot.tx.sex <- dat |>
+  mutate(Overall = "Overall") |>
+  pivot_longer(
+    cols = c(tx.type, Overall),
+    names_to = "Attribute",
+    values_to = "Category"
+  ) |>
+  mutate(
+    Attribute = recode(
+      Attribute,
+      "tx.type" = "Transplant Type"
+    )
+  ) |>
+  count(Attribute, Category, sex) |>
+  group_by(Attribute, Category) |>
   mutate(
     total = sum(n),
     percent = n / total
   ) |>
   ungroup() |>
-  ggplot() +
-  geom_col(aes(x = tx.type, y = percent, fill = sex), position = "dodge") +
-  labs(
-    title = "Percentage of Sex in Transplant Type",
-    y = "Percentage",
-    x = "Transplant Type",
-    fill = NULL
-
-  ) + 
-  scale_x_discrete(
-    labels = c("Cadaveric" = "Deceased"),
+  ggplot(aes(x = Category, y = percent, fill = sex)) +
+  geom_col(position = "dodge") +
+  facet_grid(
+    ~ Attribute,
+    scales = "free_x",
+    space = "free_x"
   ) +
-  theme_minimal() +
-  theme(
-    legend.position = "bottom",
-    plot.title = element_text(size = 17, face = "bold"),
-    axis.title = element_text(size = 15),
-    axis.text = element_text(size = 10),
-    legend.text = element_text(size = 12),
-    plot.margin = margin(20, 30, 20, 30)
+  labs(
+    title = "Percentage of Sex by Transplant Type",
+    x = "Group",
+    y = "Percentage",
+    fill = NULL
+  ) +
+  scale_x_discrete(
+    labels = c("Cadaveric" = "Deceased")
+  ) +
+  my.theme
+plot.tx.sex
+```
+
+## Transplant Type vs Recipient Age
+
+```{r, fig.width=7, fig.height=5}
+plot.tx.rec <- ggplot(data = dat) +
+  geom_density(aes(x = age.rec, color = tx.type)) +
+  geom_density(
+    aes(x = age.rec, color = "Overall"),
+    linewidth = 0.5,
+    linetype = "dashed"
+  ) +
+  scale_color_manual(
+    values = c(
+      "Cadaveric" = "darkorange",
+      "Living" = "darkgreen",
+      "Overall" = "gray50"
+    ),
+    labels = c(
+      "Cadaveric" = "Deceased"
+    )
+  ) +
+  labs(
+    title = "Density of Recipient Age by Transplant Type",
+    x = "Recipient Age",
+    y = "Density",
+    color = NULL
+  ) +
+  my.theme
+plot.tx.rec
+```
+
+
+# Elementary Analysis: Transplant Type (Cont.)
+
+## Transplant Type vs HLA Match
+
+Living donors have less `hla.match` then cadaveric donors.
+
+```{r, fig.width=7, fig.height=5}
+ggplot(data = dat) +
+  geom_bar(aes(x = hla.match)) +
+  facet_wrap(tx.type ~ ., ncol = 1) +
+  labs(
+    title = "Distribution of HLA Match by Transplant Type",
+    x = "HLA match",
+    y = "Count",
+    color = NULL
+  ) +
+  my.theme
+
+```
+
+## Transplant Type vs Donor Age
+
+```{r, fig.width=7, fig.height=5}
+plot.tx.donor <- ggplot(data = dat) +
+  geom_density(aes(x = age.donor, color = tx.type)) +
+  geom_density(
+    aes(x = age.donor, color = "Overall"),
+    linewidth = 0.5,
+    linetype = "dashed"
+  ) +
+  scale_color_manual(
+    values = c(
+      "Cadaveric" = "darkorange",
+      "Living" = "darkgreen",
+      "Overall" = "gray50"
+    ),
+    labels = c(
+      "Cadaveric" = "Deceased"
+    )
+  ) +
+  labs(
+    title = "Density of Donor Age by Transplant Type",
+    x = "Recipient Age",
+    y = "Density",
+    color = NULL
+  ) +
+  my.theme
+
+plot.tx.donor
+```
+
+# Overall Kaplan-Meier Curve
+
+```{r}
+km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
+```
+
+```{r, fig.width=7, fig.height=5}
+km_all |>
+  ggsurvfit(type = "survival") +
+  add_confidence_interval() +
+  scale_y_continuous(limits = c(0, 1)) +
+  labs(
+    title = "Kaplan-Meier Curve for Overall Survival",
+    x = "Years of Follow-up",
+    y = "Survival Probability",
+  ) +
+  my.theme
+```
+
+```{r}
+summary(km_all, times = c(0, 4, 8, 12))
+```
+
+# Cox Model (with intervals?)
+
+# Cox Model: Transplant Type
+
+```{r}
+cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat)
+summary(cox.tx)
+```
+
+```{r}
+km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat)
+```
+
+```{r, fig.width=7, fig.height=5}
+km.tx |>
+  ggsurvfit(type = "survival") +
+  add_confidence_interval() +
+  scale_color_discrete(
+    labels = c("Deceased", "Living")
+  ) +
+  scale_fill_discrete(
+    labels = c("Deceased", "Living")
+  ) +
+  labs(
+    title = "Survival by Donor Type",
+    x = "Years of Follow-up",
+    y = "Survival Probability",
+    color = "Donor Type",
+    fill = "Donor Type"
+  ) +
+  my.theme
+```
+
+```{r}
+summary(km.tx, times = c(0, 4, 8, 12))
+```
+
+# Cox Model: Age Recipient (Continuous)
+
+```{r}
+cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat)
+summary(cox.age.rec)
+```
+
+```{r}
+km.age.rec <- survfit(Surv(follow.up, death) ~ age.rec, data = dat)
+```
+
+```{r, fig.width=7, fig.height=7}
+km.age.rec |>
+  ggsurvfit(type = "survival") +
+  scale_color_discrete(
+    labels = seq(1, 18)
+  ) +
+  labs(
+    title = "Survival by Recipient Age",
+    x = "Years of Follow-up",
+    y = "Survival Probability",
+    color = "Recipient Age",
+  ) +
+  my.theme
+```
+
+<details>
+<summary>View life table</summary>
+
+```{r}
+summary(km.age.rec, times = c(0, 4, 8, 12))
+```
+</details>
+
+
+# Cox Model: Age Recipient (Categorical)
+
+```{r}
+class.age <- function(age) {
+  if (is.na(age)) return(NA)
+  if (age < 2) { 
+    return("0-1")
+  } else if (age < 6) {
+    return("2-5")
+  } else if (age < 11) {
+    return("6-10")
+  } else if (age < 19) {
+    return("11-18")
+  }
+}
+
+dat.age.rec.group <- dat
+dat.age.rec.group$age.rec.group = sapply(dat$age.rec, class.age) |>
+  factor(levels = c(
+    "0-1",
+    "2-5",
+    "6-10",
+    "11-18"
+  ))
+cox.age.rec.group <- coxph(Surv(follow.up, death) ~ age.rec.group,
+                           data = dat.age.rec.group)
+```
+
+
+```{r, echo=TRUE}
+cox.age.rec.group
+```
+
+</details>
+
+
+```{r, echo=TRUE}
+eme.age.rec.group <- emmeans(cox.age.rec.group, pairwise ~ age.rec.group,
+                             type = "response")
+eme.age.rec.group
+```
+
+
+```{r}
+km.age.rec.group <- survfit(Surv(follow.up, death) ~ age.rec.group, 
+                            data = dat.age.rec.group)
+```
+
+```{r, fig.width=7, fig.height=7}
+km.age.rec.group |>
+  ggsurvfit(type = "survival") +
+  add_confidence_interval() +
+  scale_color_discrete(
+    labels = c("0-1", "2-5", "6-10", "11-18")
+  ) +
+  scale_fill_discrete(
+    labels = c("0-1", "2-5", "6-10", "11-18")
+  ) +
+  labs(
+    title = "Survival by Recipient Age Group",
+    x = "Years of Follow-up",
+    y = "Survival Probability",
+    color = "Recipient Age Group",
+    fill = "Recipient Age Group",
+  ) +
+  my.theme
+```
+
+```{r, fig.width=7, fig.height=7}
+eme.age.rec.group$contrast |> 
+  as.data.frame() |>
+  select(contrast, ratio, SE, p.value) |>
+  mutate(
+    contrast = factor(contrast),
+    log.ratio = log(ratio),
+    SE.log = SE / ratio,
+    low = exp(log.ratio - 1.96 * SE.log),
+    up = exp(log.ratio + 1.96 * SE.log)
+  ) |>
+  ggplot(aes(y = reorder(contrast, ratio))) +
+  geom_errorbarh(aes(xmin = low, xmax = up)) +
+  geom_point(aes(x = ratio)) +
+  geom_vline(xintercept =  1, linetype = "dashed") +
+  scale_y_discrete(
+    labels = function(x) {
+      gsub("[()]", "", x = x)
+    }
+  ) +
+  labs(
+    x = "Hazard Ratio",
+    y = "Recipient Age Group Comparison",
+    title = "Pairwise Hazard Ratios by Recipient Age Groups"
+  ) +
+  my.theme
+```
+
+# Identifying Predictors
+
+TODO: write a more generic function
+
+```{r}
+Waldtest <- function(cox.fit, i.betas){
+  q <- length(i.betas)
+  coefs <- cox.fit$coefficients
+  var <- cox.fit$var[i.betas, i.betas]
+  Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas]
+  p.value <- 1-pchisq(as.numeric(Wald), df = q)
+  print(p.value)
+  return(p.value)
+}
+```
+
+```{r}
+vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", 
+          "tx.type")
+included <- vars == "tx.type"
+included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
+included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
+
+find.best.surv <- function(included) {
+  print('---')
+  test.vars <- vars[!included]
+  p.values <- sapply(test.vars, function(test.var) {
+    new.surv <- reformulate(
+      termlabels = c(vars[included], test.var),
+      response = "Surv(follow.up, death)"
+    )
+    fit.tx <- coxph(new.surv, data = dat)
+    Waldtest(fit.tx, sum(included))
+  })
+  if (sum(p.values > 0.05) == sum(included)) {
+    print(3)
+    return(included) 
+  }
+  if (sum(included == TRUE) == length(included)) {
+    print(2)
+    return(included)
+  }
+  print(1)
+  print(which.min(p.values))
+  print(included)
+  print(p.values)
+  print(which.min(p.values))
+  included[which.min(p.values)] <- TRUE
+  print(included)
+  # find.best.surv(included)
+}
+
+
+find.best.surv(included)
+
+```
+
+```{r}
+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match
+fit.cox.model(surv, 3)
+surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor
+fit.cox.model(surv, 3)
+surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc
+fit.cox.model(surv, 3)
+surv <- Surv(follow.up, death) ~ tx.type + age.rec + year
+fit.cox.model(surv, 3)
+surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex
+fit.cox.model(surv, 3)
+```
+
+
+```
+
+---
+
+
+# Old
+
+<details>
+
+## Death & Censored
+
+
+```{r, echo=TRUE}
+dat |>
+  group_by(death) |>
+  summarise(
+    n = n(),
+    percentage = n() / nrow(dat) * 100
   )
+```
+
+Most of the patients (95.24%) were censored.
+
+```{r}
+n.pop <- nrow(dat)
+dat.accum <- dat[dat$follow.up > 0,] |>
+  select(follow.up, death) |>
+  mutate(censored = ifelse(death == 1, 0, 1)) |>
+  group_by(follow.up) |>
+  summarize(
+    death = sum(death),
+    censored = sum(censored),
+    .groups = "drop"
+  ) |>
+  arrange(follow.up) |>
+  mutate(
+    death.accum = cumsum(death),
+    censored.accum = cumsum(censored),
+    event.accum = death.accum + censored.accum,
+    n.at.risk = n.pop - c(0, event.accum[-n()])
+  )
+dat.accum |>
+  pivot_longer(
+    cols = c(death.accum, censored.accum, n.at.risk),
+    names_to = "type",
+    values_to = "accum",
+  ) |>
+  ggplot(aes(x = follow.up, y = accum, color = type)) +
+  geom_step(linewidth = 1, direction = "hv") +
+  my.theme
+```
+
+```{r}
+dat[dat$death == 1,] |>
+  ggplot(aes(x = follow.up)) +
+  geom_histogram(bins = 50)
+```
 
 
+```{r}
+dat[dat$follow.up == 0,]
+```
+
+```{r}
+head(dat)
+```
+
+## Cox Model - Age as a Continuous Variable
+
+```{r, echo=TRUE}
+cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat)
+cox.age.rec |> summary()
+```
+
+## Cox Model - Age as a Categorical Variable
+
+
+```{r, fig.width=10, fig.height=10}
+cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + year, data = dat)
+cox.zph(cox.final)
+plot(cox.zph(cox.final))
+
+ggcoxzph(cox.zph(cox.final),
+         ggtheme = theme_minimal() +
+         theme(
+           legend.position = "bottom",
+           plot.title = element_text(size = 17, face = "bold"),
+           axis.title = element_text(size = 15),
+           axis.text = element_text(size = 12),
+           legend.text = element_text(size = 12),
+           strip.text = element_blank(),
+           plot.margin = margin(20, 30, 20, 30)
+         ))
 ```
 
 ## Table 1 (L)
@@ -724,5 +988,246 @@ anova(m1, m2, test = "LRT")
 
 ## Assumption Testing
 
-# Conclusion (M + L)
+
+  ## Why Survival Analysis (L)
+
+Motivation: study the distribution of time to event $T$.
+
+Example: time of death after kidney transplant. 
+
+
+```{r}
+ex <- data.frame(
+  id = c(1, 2, 3, 4, 5),
+  transplant = c(2000, 2000, 2001, 2003, 2004),
+  death = c(2005, 2009, 2005, 2004, 2016)
+)
+
+ex_table_1 <- ex |>
+  gt() |>
+  cols_label(
+    id = "ID",
+    transplant = "Transplant",
+    death = "Death"
+  ) %>%
+  cols_align(align = "center", columns = everything())
+
+ex_plot_real_time <- ggplot(ex) +
+  geom_segment(aes(x = transplant, xend = death, y = factor(id),
+                   yend = factor(id)), color = "grey50", linewidth = 1) +
+  geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
+  geom_point(aes(x = death, y = factor(id), color = "death"), size = 3) +
+  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
+  scale_y_discrete(limits = rev) +
+  labs(x = "Year", y = "Subject ID", color = "Event") +
+  theme_minimal()
+```
+
+```{r}
+ex_table_1
+ex_plot_real_time
+```
+
+---
+
+We then calculate time to death for each respondents. 
+
+With this, all we need is to fit a linear model `t ~ X` or `log(t) ~ X`
+
+```{r}
+ex_t <- ex |>
+  mutate(
+    t = death - transplant
+  )
+
+ex_table_2 <- ex_t |>
+  gt() |>
+  cols_label(
+    id = "ID",
+    transplant = "Transplant",
+    death = "Death",
+    t = "Time to death"
+  ) |>
+  cols_align(align = "center", columns = everything())
+
+ex_plot_uniform_time <- ggplot(ex_t) +
+  geom_segment(aes(x = 0, xend = t, y = factor(id),
+                   yend = factor(id)), color = "grey50", linewidth = 1) +
+  geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
+  geom_point(aes(x = t, y = factor(id), color = "death"), size = 3) +
+  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D")) +
+  scale_y_discrete(limits = rev) +
+  labs(x = "Year", y = "Subject ID", color = "Event") +
+  theme_minimal()
+
+```
+
+
+```{r}
+ex_table_2
+ex_plot_uniform_time
+```
+
+---
+
+What if we do not know when exactly some respondents die?
+
+Scenario 1: the study ends at the year 2008?
+
+```{r}
+ex_t_c <- ex_t |>
+  mutate(
+    death_censored = if_else(death <= 2008, death, 2008),
+    death_censored_txt = if_else(death <= 2008, as.character(death), "> 2008"),
+    status = if_else(death <= 2008, 1, 0),
+    t_1 = death_censored - transplant,
+    t_1_txt = if_else(death <= 2008, as.character(t_1),
+                      paste(">", as.character(t_1)))
+  )
+```
+
+```{r}
+ex_table_3 <- ex_t_c |>
+  select(id, transplant, death_censored_txt, t_1_txt) |>
+  gt() |>
+  cols_label(
+    id = "ID",
+    transplant = "Transplant",
+    death_censored_txt = "Death",
+    t_1_txt = "Time to death"
+  ) |>
+  cols_align(align = "center", columns = everything())
+
+ex_plot_real_time_1 <- ggplot(ex_t_c) +
+  geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
+                   yend = factor(id)), color = "grey50", linewidth = 1) +
+  geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
+  geom_point(aes(x = death_censored, y = factor(id), color =
+                   if_else(status == 1, "death", "censored")), size = 3) +
+  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
+                                "censored" = "orange")) +
+  scale_y_discrete(limits = rev) +
+  labs(x = "Year", y = "Subject ID", color = "Event") +
+  xlim(2000, 2016) +
+  geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
+             linewidth = 0.8) +
+  theme_minimal()
+
+
+ex_plot_uniform_time_1 <- ggplot(ex_t_c) +
+  geom_segment(aes(x = 0, xend = t_1, y = factor(id),
+                   yend = factor(id)), color = "grey50", linewidth = 1) +
+  geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
+  geom_point(aes(x = t_1, y = factor(id), color =
+                   if_else(status == 1, "death", "censored")), size = 3) +
+  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
+                                "censored" = "orange")) +
+  scale_y_discrete(limits = rev) +
+  labs(x = "Year", y = "Subject ID", color = "Event") +
+  theme_minimal()
+```
+
+
+```{r}
+ex_table_3
+ex_plot_real_time_1
+ex_plot_uniform_time_1
+```
+
+**Right Censoring**: only observe the event (death) if it occurs before a 
+certain time (2008).
+
+---
+
+Scenario 2: respondent 3 move away; loss follow up
+
+```{r}
+ex_alt <- ex_t_c
+ex_alt$death_censored_txt[3] <- "> 2005"
+ex_alt$status[3] <- 0
+ex_alt$t_1_txt[3] <- "> 4"
+
+
+ex_table_4 <- ex_alt |>
+  select(id, transplant, death_censored_txt, t_1_txt) |>
+  gt() |>
+  cols_label(
+    id = "ID",
+    transplant = "Transplant",
+    death_censored_txt = "Death",
+    t_1_txt = "Time to death"
+  ) |>
+  cols_align(align = "center", columns = everything())
+
+ex_plot_real_time_2 <- ggplot(ex_alt) +
+  geom_segment(aes(x = transplant, xend = death_censored, y = factor(id),
+                   yend = factor(id)), color = "grey50", linewidth = 1) +
+  geom_point(aes(x = transplant, y = factor(id), color = "trans"), size = 3) +
+  geom_point(aes(x = death_censored, y = factor(id), color =
+                   if_else(status == 1, "death", "censored")), size = 3) +
+  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
+                                "censored" = "orange")) +
+  scale_y_discrete(limits = rev) +
+  labs(x = "Year", y = "Subject ID", color = "Event") +
+  xlim(2000, 2016) +
+  geom_vline(xintercept = 2008, linetype = "dashed", color = "orange",
+             linewidth = 0.8) +
+  theme_minimal()
+
+
+ex_plot_uniform_time_2 <- ggplot(ex_alt) +
+  geom_segment(aes(x = 0, xend = t_1, y = factor(id),
+                   yend = factor(id)), color = "grey50", linewidth = 1) +
+  geom_point(aes(x = 0, y = factor(id), color = "trans"), size = 3) +
+  geom_point(aes(x = t_1, y = factor(id), color =
+                   if_else(status == 1, "death", "censored")), size = 3) +
+  scale_color_manual(values = c("trans" = "#00BFC4", "death" = "#F8766D",
+                                "censored" = "orange")) +
+  scale_y_discrete(limits = rev) +
+  labs(x = "Year", y = "Subject ID", color = "Event") +
+  theme_minimal()
+```
+
+```{r}
+ex_table_4
+ex_plot_real_time_2
+ex_plot_uniform_time_2
+```
+
+---
+
+How many patients are right censored?
+
+```{r}
+dat |>
+  mutate(Overall = "Overall") |>
+  pivot_longer(
+    cols = c(Overall, sex, tx.type), 
+    names_to = "Attribute", 
+    values_to = "Category"
+  ) |>
+  count(Attribute, Category, death) |>
+  ggplot(aes(x = Category, y = n, fill = factor(death))) +
+  geom_col(position = "dodge") +
+  facet_wrap(~ Attribute, scales = "free_x") +
+  labs(x = "Group", y = "Count", fill = "Death Status") +
+  theme_minimal()
+```
+
+## Challenges in Survival Analysis (L)
+
+- **Right Censoring**: only observe the event if it occurs before a certain 
+  time.
+- **Left Censoring**: event has occurred prior to the start of a research
+  - Follow up every 3 years.
+  - Event has occurred -> event happened sometime before follow up.
+- **Left Truncation**: delayed entry; respondents are included only if they
+  survived long enough.
+  - Start follow up with patients 100 days after the transplant
+  - Patients dies within 100 day wouldn't be included in the dataset
+- **Right Truncation**: respondents are included only if they have already
+  experienced the event.
+  - Retrospective analysis from deceased patients between 1990 and 2000.
+  - Patients not dead before 2000 are not included in the dataset.
+</details>