update

2026-05-20 16:54:59 +02:00 · 2026-05-20 16:54:59 +02:00 · cadad4a4ba
commit cadad4a4ba
parent 1caa283058
5 changed files with 7071 additions and 4344 deletions
--- a/.lintr
+++ b/.lintr
@ -1,5 +1,6 @@
 linters: lintr::linters_with_defaults(
    object_name_linter = lintr::object_name_linter(
        styles = c("snake_case", "dotted.case")
-    )
+    ),
    trailing_whitespace_linter = NULL
  )
--- a/report.Rmd
+++ b/report.Rmd
--- a/report.html
+++ b/report.html
--- a/slides.Rmd
+++ b/slides.Rmd
@ -21,6 +21,7 @@ library(gt)
 library(ggsurvfit)
 library(gridExtra)
 library(survminer)
 library(broom)
 ```
@ -32,9 +33,9 @@ dat <- dat |>
  mutate(
    sex = factor(sex, levels = c(0,1), labels = c("Female","Male")),
    tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")),
-    hla.match = factor(hla.match),
+    # hla.match = factor(hla.match),
-    year = factor(year)
+    # year = factor(year)
-  )
+)
 ```
 ```{r}
@ -244,11 +245,11 @@ plot.tx.donor
 # Overall Kaplan-Meier Curve
 ```{r}
-km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
+km.all <- survfit(Surv(follow.up, death) ~ 1, data = dat)
 ```
 ```{r, fig.width=7, fig.height=5}
-km_all |>
+km.all |>
  ggsurvfit(type = "survival") +
  add_confidence_interval() +
  scale_y_continuous(limits = c(0, 1)) +
@ -261,7 +262,74 @@ km_all |>
 ```
 ```{r}
-summary(km_all, times = c(0, 4, 8, 12))
+header_bg   <- "#A83262"  # burgundy/magenta from right panel
 header_text <- "white"
 body_bg     <- "#FFF4CC"  # light cream
 stripe_bg   <- "#F6E8C3"  # slightly darker cream
 text_color  <- "#333333"
 border_col  <- "#D8C8A5"
 ```
 ```{r}
 times <- seq(0, 12, 2)
 summary(km.all, times = times)[c("time", "n.risk", "surv")] |>
  as.data.frame() |>
  gt() |>
  tab_header(
    title = "Overall Kaplan-Meier Survival Summary"
  ) |>
  cols_label(
    time = "Follow-up Time",
    n.risk = "Number at Risk",
    surv = "Survival Probability"
  ) |>
  fmt_number(
    columns = surv,
    decimals = 3
  ) |>
  tab_options(
    table.font.size = px(15),
    heading.title.font.size = px(20),
    column_labels.font.weight = "bold"
  )
 ```
 ```{r}
 summary(km.all, times = times)[c("time", "n.risk", "surv")] |>
  as.data.frame() |>
  gt() |>
  tab_header(
    title = "Overall Kaplan-Meier Survival Summary"
  ) |>
  cols_label(
    time = "Follow-up Time",
    n.risk = "Number at Risk",
    surv = "Survival Probability"
  ) |>
  fmt_number(columns = surv, decimals = 3) |>
  fmt_number(columns = time, decimals = 0) |>
  tab_style(
    style = list(
      cell_fill(color = "#A83262"),
      cell_text(color = "white", weight = "bold")
    ),
    locations = cells_column_labels()
  ) |>
  tab_style(
    style = cell_fill(color = "#FFF4CC"),
    locations = cells_body()
  ) |>
  opt_row_striping(row_striping = TRUE) |>
  tab_options(
    row.striping.background_color = "#F6E8C3",
    table.border.top.color = "#A83262",
    table.border.bottom.color = "#A83262",
    column_labels.border.bottom.color = "#A83262",
    table.font.size = px(16),
    heading.title.font.size = px(18),
    table.background.color = "#FFF4CC"
  )
 ```
 # Cox Model (with intervals?)
@ -434,29 +502,229 @@ eme.age.rec.group$contrast |>
 # Identifying Predictors
-TODO: write a more generic function
+```{r}
 # From exercise 9
 Waldtest <- function(coxph.object, idx) {
  b <- coxph.object$coef[idx]
  Sig <- coxph.object$var[idx, idx]
  Wald <- b %*% solve(Sig) %*% b
  p <- 1 - pchisq(as.numeric(Wald), df = length(idx))
  return(data.frame(Wald = Wald, df = length(idx), p = p))
 }
 test.new.var <- function(old.vars, new.var, i=1) {
  new.surv <- reformulate(
    termlabels = c(old.vars, new.var),
    response = "Surv(follow.up, death)"
  )
  cox.fit <- coxph(new.surv, data = dat, method = "breslow")
  print(cox.fit)
  Waldtest(cox.fit, i)
 }
 ```
 ## Start with nothing
 ```{r, echo=TRUE}
 old.vars <- c()
 test.new.var(old.vars, "tx.type", 1)
 test.new.var(old.vars, "hla.match", 1)
 test.new.var(old.vars, "age.donor", 1)
 test.new.var(old.vars, "age.rec", 1)
 test.new.var(old.vars, "cold.isc", 1)
 test.new.var(old.vars, "sex", 1)
 test.new.var(old.vars, "year", 1)
 ```
 ## `tx.type` + ?
 ```{r, echo=TRUE}
 old.vars <- c("tx.type")
 # test.new.var(old.vars, "hla.match", 2:7)
 test.new.var(old.vars, "hla.match", 2)
 test.new.var(old.vars, "age.donor", 2)
 test.new.var(old.vars, "age.rec", 2)
 test.new.var(old.vars, "cold.isc", 2)
 test.new.var(old.vars, "sex", 2)
 test.new.var(old.vars, "year", 2)
 ```
 `age.rec` has the smallest $p$-value
 ## `tx.type` + `age.rec` + ?
 ```{r, echo=TRUE}
 old.vars <- c("tx.type", "age.rec")
 test.new.var(old.vars, "hla.match", 3)
 test.new.var(old.vars, "age.donor", 3)
 test.new.var(old.vars, "cold.isc", 3)
 test.new.var(old.vars, "sex", 3)
 test.new.var(old.vars, "year", 3)
 ```
 `hla.match` has the smallest $p$-value
 ## `tx.type` + `age.rec` + `hla.match` + ?
 ```{r, echo=TRUE}
 old.vars <- c("tx.type", "age.rec", "hla.match")
 test.new.var(old.vars, "age.donor", 4)
 test.new.var(old.vars, "cold.isc", 4)
 test.new.var(old.vars, "sex", 4)
 test.new.var(old.vars, "year", 4)
 ```
 `age.donor` has the smallest $p$-value
 ## `tx.type` + `age.rec` + `hla.match` + `age.donor` + ? 
 ```{r, echo=TRUE}
 old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor")
 test.new.var(old.vars, "cold.isc", 5)
 test.new.var(old.vars, "sex", 5)
 test.new.var(old.vars, "year", 5)
 ```
 `year` has the smallest $p$-value
 ## `tx.type` + `age.rec` + `hla.match` + `age.donor` + `year`
 ```{r, echo=TRUE}
 old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor", "year")
 test.new.var(old.vars, "cold.isc", 6)
 test.new.var(old.vars, "sex", 6)
 ```
 Neither has $p$-value less than 0.05. Stop.
 # Full Model
 ```{r}
 surv.full <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match +
                 age.donor + year
 cox.full <- coxph(surv.full, data = dat, method = "breslow")
 ```
 ```{r, echo=TRUE}
 cox.full
 ```
 ```{r, fig.width=7, fig.height=5}
 tidy(cox.full, exponentiate = TRUE, conf.int = TRUE) |>
  mutate(
    term = recode(
      term,
      "tx.typeLiving" = "Living donor",
      "age.rec" = "Recipient age",
      "hla.match" = "HLA match",
      "age.donor" = "Donor age",
      "year" = "Transplant year"
    )
  ) |>
  ggplot(aes(x = estimate, y = reorder(term, estimate))) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.2
  ) +
  scale_x_log10() +
  geom_point(size = 2) +
  labs(
    title = "Hazard Ratios from Full Cox Model",
    x = "Hazard Ratio",
    y = NULL
  ) +
  my.theme
 ```
 ## What happened to `year`?
 ```{r}
 ggplot(data = dat) +
  geom_boxplot(aes(x = factor(year), y = follow.up))
 ```
 ```{r}
 ggplot(data = dat) +
  geom_boxplot(aes(x = factor(year), y = follow.up)) +
  facet_grid(death ~ .)
 ```
 Probabily want to omit it
 ## Without `year`
 ```{r}
 surv.full.1 <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match +
                 age.donor
 cox.full.1 <- coxph(surv.full.1, data = dat, method = "breslow")
 ```
 ```{r, echo=TRUE}
 cox.full.1
 ```
 ```{r, fig.width=7, fig.height=5}
 tidy(cox.full.1, exponentiate = TRUE, conf.int = TRUE) |>
  mutate(
    term = recode(
      term,
      "tx.typeLiving" = "Living donor",
      "age.rec" = "Recipient age",
      "hla.match" = "HLA match",
      "age.donor" = "Donor age",
    )
  ) |>
  ggplot(aes(x = estimate, y = reorder(term, estimate))) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0.2
  ) +
  scale_x_log10() +
  geom_point(size = 2) +
  labs(
    title = "Hazard Ratios from Full Cox Model (Without Year)",
    x = "Hazard Ratio",
    y = NULL
  ) +
  my.theme
 ```
 ---
 TODO: write a more generic function (UPDATE: FAILED)
 <details>
 ```{r, eval=FALSE}
 # WARNING: BAD, DO NOT USE.
 vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", 
          "tx.type")
 included <- vars == "tx.type"
 Waldtest <- function(cox.fit, i.betas){
  q <- length(i.betas)
  coefs <- cox.fit$coefficients
  var <- cox.fit$var[i.betas, i.betas]
  Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas]
  p.value <- 1-pchisq(as.numeric(Wald), df = q)
  print(p.value)
  return(p.value)
 }
 ```
 ```{r}
 vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", 
          "tx.type")
 included <- vars == "tx.type"
 included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
 included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE)
-find.best.surv <- function(included) {
+find.model.vars <- function(included) {
  print('---')
  test.vars <- vars[!included]
  p.values <- sapply(test.vars, function(test.var) {
    new.surv <- reformulate(
@ -464,31 +732,24 @@ find.best.surv <- function(included) {
      response = "Surv(follow.up, death)"
    )
    fit.tx <- coxph(new.surv, data = dat)
-    Waldtest(fit.tx, sum(included))
+    Waldtest(fit.tx, 1)
  })
-  if (sum(p.values > 0.05) == sum(included)) {
+  if (all(p.values > 0.05)) {
    print(3)
    return(included) 
  }
  if (sum(included == TRUE) == length(included)) {
    print(2)
    return(included)
  }
-  print(1)
+  new.var <- names(p.values)[which.min(p.values)]
-  print(which.min(p.values))
+  included[which(vars == new.var)] <- TRUE
-  print(included)
+  return(find.model.vars(included))
  print(p.values)
  print(which.min(p.values))
  included[which.min(p.values)] <- TRUE
  print(included)
  # find.best.surv(included)
 }
-
+included <- find.model.vars(included)
-find.best.surv(included)
+vars[included]
 ```
 </details>
 ```{r}
 # surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match
 # fit.cox.model(surv, 3)
--- a/slides.html
+++ b/slides.html