library(tidyverse)
-library(dplyr)
-library(ggplot2)
-library(survival)
-library(emmeans)
-library(foreign)
-library(gtsummary)foo
-foo
dat <- read.csv("./unos.txt", sep = "\t")
-head(dat) hlamat age age.1 cold_isc death year sex txtype fu
-1 2 25 15 6 0 2002 0 1 0
-2 1 42 10 11 1 1999 0 1 0
-3 1 9 14 8 0 2002 1 1 0
-4 0 35 12 26 0 2002 1 1 0
-5 2 17 12 28 1 1997 1 1 0
-6 3 44 13 1 0 2002 1 0 0
-names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
- "year", "sex", "tx.type", "follow.up")library(tidyverse)
+## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr 1.1.4 ✔ readr 2.1.5
+## ✔ forcats 1.0.0 ✔ stringr 1.5.1
+## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
+## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
+## ✔ purrr 1.1.0
+## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag() masks stats::lag()
+## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+library(dplyr)
+library(ggplot2)
+library(survival)
+library(emmeans)
+## Welcome to emmeans.
+## Caution: You lose important information if you filter this package's results.
+## See '? untidy'
+library(foreign)
+library(gtsummary)
+library(ggsurvfit)
+dat <- read.csv("./unos.txt", sep = "\t")
+head(dat)
+## hlamat age age.1 cold_isc death year sex txtype fu
+## 1 2 25 15 6 0 2002 0 1 0
+## 2 1 42 10 11 1 1999 0 1 0
+## 3 1 9 14 8 0 2002 1 1 0
+## 4 0 35 12 26 0 2002 1 1 0
+## 5 2 17 12 28 1 1997 1 1 0
+## 6 3 44 13 1 0 2002 1 0 0
+names(dat) <- c("hla.match", "age.donor", "age.rec", "cold.isc", "death",
+ "year", "sex", "tx.type", "follow.up")
Complete case
-nrow(dat)[1] 9775
-sapply(names(dat), function(col) {
- is.na(dat[, col]) |> sum()
-})hla.match age.donor age.rec cold.isc death year sex tx.type
- 234 113 9 2250 0 0 0 0
-follow.up
- 0
-1 Exercise
-1.1 Exercise 1
--Illustrate in a table the characteristics of the population (age, sex, race, donor, . . . ).
++nrow(dat)+## [1] 9775+sapply(names(dat), function(col) { + is.na(dat[, col]) |> sum() +})+## hla.match age.donor age.rec cold.isc death year sex tx.type +## 234 113 9 2250 0 0 0 0 +## follow.up +## 0+Exercise
++Exercise 1
++-Illustrate in a table the characteristics of the population (age, +sex, race, donor, . . . ).
-+-hist(dat$age.donor)----- --
-hist(dat$age.rec)----- --
-hist(dat$cold.isc)----- --
-hist(dat$year)----- --
-hist(dat$follow.up)----- --
-unique(dat$hla.match)---[1] 2 1 0 3 4 NA 5 6-unique(dat$year)---[1] 2002 1999 1997 2001 1994 1995 1996 1993 1990 1991 1992 2000 1998+hist(dat$age.donor)+
+hist(dat$age.rec)+
+hist(dat$cold.isc)+
+hist(dat$year)+
+hist(dat$follow.up)+
+unique(dat$hla.match)+## [1] 2 1 0 3 4 NA 5 6+unique(dat$year)## [1] 2002 1999 1997 2001 1994 1995 1996 1993 1990 1991 1992 2000 1998Change the order!!
----dat.table1 <- dat |> - mutate( - sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), - tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), - hla.match = factor(hla.match), - year = factor(year) - )--table1 <- dat.table1 |> - select(hla.match, age.donor, age.rec, cold.isc, year, sex, tx.type) |> - tbl_summary( - statistic = list( - all_continuous() ~ "{median} ({p25}, {p75})" - ), - label = list( - hla.match ~ "HLA matches, n(%)", - age.donor ~ "Donor age, median (IQR)", - age.rec ~ "Recipient age, median (IQR)", - cold.isc ~ "Cold ischemic time (hours), median (IQR), ", - year ~ "Year of transplant", - sex ~ "Sex, n(%)", - tx.type ~ "Transplant type, n(%)" - ), - missing = "ifany" - ) |> - modify_footnote(all_stat_cols() ~ NA) - -table1-- - --
- - -- - - - - -Characteristic -N = 9,775 -- -HLA matches, n(%) --
-- -0 -784 (8.2%) -- -1 -1,504 (16%) -- -2 -1,553 (16%) -- -3 -3,778 (40%) -- -4 -1,278 (13%) -- -5 -365 (3.8%) -- -6 -279 (2.9%) -- -Unknown -234 -- -Donor age, median (IQR) -33 (21, 41) -- -Unknown -113 -- -Recipient age, median (IQR) -13 (8, 16) -- -Unknown -9 -- -Cold ischemic time (hours), median (IQR), -7 (1, 19) -- -Unknown -2,250 -- -Year of transplant --
-- -1990 -728 (7.4%) -- -1991 -734 (7.5%) -- -1992 -666 (6.8%) -- -1993 -736 (7.5%) -- -1994 -787 (8.1%) -- -1995 -813 (8.3%) -- -1996 -789 (8.1%) -- -1997 -745 (7.6%) -- -1998 -740 (7.6%) -- -1999 -842 (8.6%) -- -2000 -706 (7.2%) -- -2001 -834 (8.5%) -- -2002 -655 (6.7%) -- -Sex, n(%) --
-- -Female -4,014 (41%) -- -Male -5,761 (59%) -- -Transplant type, n(%) --
-- -Cadaveric -5,148 (53%) -- - +Living -4,627 (47%) -+ +
- - - -+ + + +Characteristic +N = 9,775 ++ HLA matches, n(%) ++ 0 +784 (8.2%) + 1 +1,504 (16%) + 2 +1,553 (16%) + 3 +3,778 (40%) + 4 +1,278 (13%) + 5 +365 (3.8%) + 6 +279 (2.9%) + Unknown +234 + Donor age, median (IQR) +33 (21, 41) + Unknown +113 + Recipient age, median (IQR) +13 (8, 16) + Unknown +9 + Cold ischemic time (hours), median (IQR), +7 (1, 19) + Unknown +2,250 + Year of transplant ++ 1990 +728 (7.4%) + 1991 +734 (7.5%) + 1992 +666 (6.8%) + 1993 +736 (7.5%) + 1994 +787 (8.1%) + 1995 +813 (8.3%) + 1996 +789 (8.1%) + 1997 +745 (7.6%) + 1998 +740 (7.6%) + 1999 +842 (8.6%) + 2000 +706 (7.2%) + 2001 +834 (8.5%) + 2002 +655 (6.7%) + Sex, n(%) ++ Female +4,014 (41%) + Male +5,761 (59%) + Transplant type, n(%) ++ Cadaveric +5,148 (53%) + + + Living +4,627 (47%) --table.split <- dat.table1 |> - select(hla.match, age.donor, age.rec, cold.isc, year, sex, tx.type) |> - tbl_summary( - by = tx.type, - statistic = list( - all_continuous() ~ "{median} ({p25}, {p75})" - ), - label = list( - hla.match ~ "HLA matches, n(%)", - age.donor ~ "Donor age, median (IQR)", - age.rec ~ "Recipient age, median (IQR)", - cold.isc ~ "Cold ischemic time (hours), median (IQR), ", - year ~ "Year of transplant", - sex ~ "Sex, n(%)" - ), - missing = "ifany" - ) |> - add_overall() |> - modify_footnote(all_stat_cols() ~ NA) - -table.split-- - --
- - -- - - - - - - -Characteristic -Overall -
-N = 9,775Cadaveric -
-N = 5,148Living -
-N = 4,627- -HLA matches, n(%) --
--
--
-- -0 -784 (8.2%) -125 (2.5%) -659 (14%) -- -1 -1,504 (16%) -160 (3.2%) -1,344 (29%) -- -2 -1,553 (16%) -228 (4.6%) -1,325 (29%) -- -3 -3,778 (40%) -3,039 (61%) -739 (16%) -- -4 -1,278 (13%) -1,007 (20%) -271 (5.9%) -- -5 -365 (3.8%) -241 (4.9%) -124 (2.7%) -- -6 -279 (2.9%) -153 (3.1%) -126 (2.7%) -- -Unknown -234 -195 -39 -- -Donor age, median (IQR) -33 (21, 41) -37 (31, 42) -22 (15, 37) -- -Unknown -113 -112 -1 -- -Recipient age, median (IQR) -13 (8, 16) -13 (7, 16) -14 (9, 16) -- -Unknown -9 -7 -2 -- -Cold ischemic time (hours), median (IQR), -7 (1, 19) -1 (0, 1) -19 (13, 25) -- -Unknown -2,250 -1,490 -760 -- -Year of transplant --
--
--
-- -1990 -728 (7.4%) -318 (6.2%) -410 (8.9%) -- -1991 -734 (7.5%) -387 (7.5%) -347 (7.5%) -- -1992 -666 (6.8%) -345 (6.7%) -321 (6.9%) -- -1993 -736 (7.5%) -403 (7.8%) -333 (7.2%) -- -1994 -787 (8.1%) -355 (6.9%) -432 (9.3%) -- -1995 -813 (8.3%) -427 (8.3%) -386 (8.3%) -- -1996 -789 (8.1%) -416 (8.1%) -373 (8.1%) -- -1997 -745 (7.6%) -408 (7.9%) -337 (7.3%) -- -1998 -740 (7.6%) -412 (8.0%) -328 (7.1%) -- -1999 -842 (8.6%) -436 (8.5%) -406 (8.8%) -- -2000 -706 (7.2%) -392 (7.6%) -314 (6.8%) -- -2001 -834 (8.5%) -483 (9.4%) -351 (7.6%) -- -2002 -655 (6.7%) -366 (7.1%) -289 (6.2%) -- -Sex, n(%) --
--
--
-- -Female -4,014 (41%) -2,105 (41%) -1,909 (41%) -- - +Male -5,761 (59%) -3,043 (59%) -2,718 (59%) -+ +
+ ++ + + +Characteristic +Overall +
+N = 9,775Cadaveric +
+N = 5,148Living +
+N = 4,627+ HLA matches, n(%) ++ + + 0 +784 (8.2%) +125 (2.5%) +659 (14%) + 1 +1,504 (16%) +160 (3.2%) +1,344 (29%) + 2 +1,553 (16%) +228 (4.6%) +1,325 (29%) + 3 +3,778 (40%) +3,039 (61%) +739 (16%) + 4 +1,278 (13%) +1,007 (20%) +271 (5.9%) + 5 +365 (3.8%) +241 (4.9%) +124 (2.7%) + 6 +279 (2.9%) +153 (3.1%) +126 (2.7%) + Unknown +234 +195 +39 + Donor age, median (IQR) +33 (21, 41) +37 (31, 42) +22 (15, 37) + Unknown +113 +112 +1 + Recipient age, median (IQR) +13 (8, 16) +13 (7, 16) +14 (9, 16) + Unknown +9 +7 +2 + Cold ischemic time (hours), median (IQR), +7 (1, 19) +1 (0, 1) +19 (13, 25) + Unknown +2,250 +1,490 +760 + Year of transplant ++ + + 1990 +728 (7.4%) +318 (6.2%) +410 (8.9%) + 1991 +734 (7.5%) +387 (7.5%) +347 (7.5%) + 1992 +666 (6.8%) +345 (6.7%) +321 (6.9%) + 1993 +736 (7.5%) +403 (7.8%) +333 (7.2%) + 1994 +787 (8.1%) +355 (6.9%) +432 (9.3%) + 1995 +813 (8.3%) +427 (8.3%) +386 (8.3%) + 1996 +789 (8.1%) +416 (8.1%) +373 (8.1%) + 1997 +745 (7.6%) +408 (7.9%) +337 (7.3%) + 1998 +740 (7.6%) +412 (8.0%) +328 (7.1%) + 1999 +842 (8.6%) +436 (8.5%) +406 (8.8%) + 2000 +706 (7.2%) +392 (7.6%) +314 (6.8%) + 2001 +834 (8.5%) +483 (9.4%) +351 (7.6%) + 2002 +655 (6.7%) +366 (7.1%) +289 (6.2%) + Sex, n(%) ++ + + Female +4,014 (41%) +2,105 (41%) +1,909 (41%) + + + Male +5,761 (59%) +3,043 (59%) +2,718 (59%) +table.split <- dat.table1 |> + select(sex, age.rec, age.donor, hla.match, cold.isc, tx.type) |> + tbl_summary( + by = tx.type, + statistic = list( + all_continuous() ~ "{median} ({p25}, {p75})" + ), + label = list( + hla.match ~ "HLA matches, n(%)", + age.donor ~ "Donor age, median (IQR)", + age.rec ~ "Recipient age, median (IQR)", + cold.isc ~ "Cold ischemic time (hours), median (IQR), ", + sex ~ "Sex, n(%)" + ), + missing = "ifany" + ) |> + add_overall() |> + modify_footnote(all_stat_cols() ~ NA) +table.split+ +++ +
+ + + +Characteristic +Overall +
+N = 9,775Cadaveric +
+N = 5,148Living +
+N = 4,627+ Sex, n(%) ++ + + Female +4,014 (41%) +2,105 (41%) +1,909 (41%) + Male +5,761 (59%) +3,043 (59%) +2,718 (59%) + Recipient age, median (IQR) +13 (8, 16) +13 (7, 16) +14 (9, 16) + Unknown +9 +7 +2 + Donor age, median (IQR) +33 (21, 41) +37 (31, 42) +22 (15, 37) + Unknown +113 +112 +1 + HLA matches, n(%) ++ + + 0 +784 (8.2%) +125 (2.5%) +659 (14%) + 1 +1,504 (16%) +160 (3.2%) +1,344 (29%) + 2 +1,553 (16%) +228 (4.6%) +1,325 (29%) + 3 +3,778 (40%) +3,039 (61%) +739 (16%) + 4 +1,278 (13%) +1,007 (20%) +271 (5.9%) + 5 +365 (3.8%) +241 (4.9%) +124 (2.7%) + 6 +279 (2.9%) +153 (3.1%) +126 (2.7%) + Unknown +234 +195 +39 + Cold ischemic time (hours), median (IQR), +7 (1, 19) +1 (0, 1) +19 (13, 25) + + + + Unknown +2,250 +1,490 +760 Calculate median follow-up (reverse Kaplan-Meier method), with 95% +CI
++rev_km <- survfit(Surv(follow.up, death == 0) ~ 1, data = dat) + +summary(rev_km)$table+## records n.max n.start events rmean se(rmean) +## 9.775000e+03 9.775000e+03 9.775000e+03 9.310000e+03 4.055027e+00 3.202711e-02 +## median 0.95LCL 0.95UCL +## 3.380822e+00 3.227397e+00 3.567123e+00+median_followup <- summary(rev_km)$table["median"] +confidence_interval <- c(summary(rev_km)$table["0.95LCL"], summary(rev_km)$table["0.95UCL"]) +median_followup+## median +## 3.380822+confidence_interval+## 0.95LCL 0.95UCL +## 3.227397 3.567123+g <- ggplot(dat)+g + geom_point(aes(x = follow.up, y = death))+
+g + geom_density(aes(x = follow.up))+
+g + geom_density(aes(x = age.donor))+## Warning: Removed 113 rows containing non-finite outside the scale range +## (`stat_density()`).+
+g + geom_bar(aes(x = age.rec))+## Warning: Removed 9 rows containing non-finite outside the scale range +## (`stat_count()`).+
+g + geom_boxplot(aes(x = age.rec, y = age.donor, group = age.donor))+## Warning: Removed 113 rows containing missing values or values outside the scale range +## (`stat_boxplot()`).+## Warning: Removed 7 rows containing non-finite outside the scale range +## (`stat_boxplot()`).+
+# dat$age.1 |> table()+g + geom_boxplot(aes(x = year, y = follow.up, group = year))+
+-Exercise 2
+Plot the Kaplan-Meier overall survival curve for pediatric kid- ney +transplant recipients for the first 12 years after transplantation.
+!!!!!!!! just set xlim to 12, do not remove those individuals +!!!!!!!
++km <- survfit(Surv(follow.up, death) ~ 1, data = dat[dat$follow.up <= 12, ]) +plot(km)
Calculate median follow-up (reverse Kaplan-Meier method), with 95% CI
----rev_km <- survfit(Surv(follow.up, death == 0) ~ 1, data = dat) - -summary(rev_km)$table---records n.max n.start events rmean se(rmean) -9.775000e+03 9.775000e+03 9.775000e+03 9.310000e+03 4.055027e+00 3.202711e-02 - median 0.95LCL 0.95UCL -3.380822e+00 3.227397e+00 3.567123e+00-median_followup <- summary(rev_km)$table["median"] -confidence_interval <- c(summary(rev_km)$table["0.95LCL"], summary(rev_km)$table["0.95UCL"]) -median_followup---median -3.380822-confidence_interval---0.95LCL 0.95UCL -3.227397 3.567123---g <- ggplot(dat)---g + geom_point(aes(x = follow.up, y = death))----- --
-g + geom_density(aes(x = follow.up))----- --
-g + geom_density(aes(x = age.donor))---Warning: Removed 113 rows containing non-finite outside the scale range -(`stat_density()`).----- --
-g + geom_bar(aes(x = age.rec))---Warning: Removed 9 rows containing non-finite outside the scale range -(`stat_count()`).----- --
-g + geom_boxplot(aes(x = age.rec, y = age.donor, group = age.donor))---Warning: Removed 113 rows containing missing values or values outside the scale range -(`stat_boxplot()`).---Warning: Removed 7 rows containing non-finite outside the scale range -(`stat_boxplot()`).----- --
-# dat$age.1 |> table()-- --g + geom_boxplot(aes(x = year, y = follow.up, group = year))----- --
- -1.2 Exercise 2
-Plot the Kaplan-Meier overall survival curve for pediatric kid- ney transplant recipients for the first 12 years after transplantation.
-!!!!!!!! just set xlim to 12, do not remove those individuals !!!!!!!
----km <- survfit(Surv(follow.up, death) ~ 1, data = dat[dat$follow.up <= 12, ]) -plot(km)----- --
- 1.3 Exercise 3
-We are going to compare mortality rates (hazard functions) between children whose transplanted kidney was provided by a living donor (in general a family member) and those whose source was recently deceased (variable donor type:
-txtype). Use the life table method to calculate the death rates for the first 5 years for each group (take in the first year intervals of 4 months and then look at each year) and show the results in a table. Estimate the hazard ratio in each time interval as the ratio between the mortality rates in the two groups. What do you notice?---dat.5 <- dat[dat$follow.up <= 5, ] -head(dat.5)---hla.match age.donor age.rec cold.isc death year sex tx.type follow.up -1 2 25 15 6 0 2002 0 1 0 -2 1 42 10 11 1 1999 0 1 0 -3 1 9 14 8 0 2002 1 1 0 -4 0 35 12 26 0 2002 1 1 0 -5 2 17 12 28 1 1997 1 1 0 -6 3 44 13 1 0 2002 1 0 0-classify_time_interval = function(fu) { - if (fu <= 1/3) { - return(1/3) - } else if (fu <= 2/3) { - return(2/3) - } else if (fu <= 1) { - return(1) - } - ceiling(fu) -} - -dat.5$fu.interval <- sapply(dat.5$follow.up, classify_time_interval) -table(dat.5$fu.interval)----0.333333333333333 0.666666666666667 1 2 - 982 528 640 1226 - 3 4 5 - 1184 1093 950-head(dat.5)---hla.match age.donor age.rec cold.isc death year sex tx.type follow.up -1 2 25 15 6 0 2002 0 1 0 -2 1 42 10 11 1 1999 0 1 0 -3 1 9 14 8 0 2002 1 1 0 -4 0 35 12 26 0 2002 1 1 0 -5 2 17 12 28 1 1997 1 1 0 -6 3 44 13 1 0 2002 1 0 0 - fu.interval -1 0.3333333 -2 0.3333333 -3 0.3333333 -4 0.3333333 -5 0.3333333 -6 0.3333333-+-dat.5.life <- dat.5 |> - group_by(fu.interval) |> - summarize( - n.censored = sum(death == 0), - n.event = sum(death), - n.at.risk = nrow(dat), - ) - -for (i in 2:nrow(dat.5.life)) { - j <- i - 1 - - n.censored.pre <- dat.5.life$n.censored[j] - n.event.pre <- dat.5.life$n.event[j] - n.at.risk.pre <- dat.5.life$n.at.risk[j] - - n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre - - dat.5.life$n.at.risk[i] <- n.at.risk -} - -print(nrow(dat))---[1] 9775-dat.5.life---# A tibble: 7 × 4 - fu.interval n.censored n.event n.at.risk - <dbl> <int> <int> <int> -1 0.333 830 152 9775 -2 0.667 491 37 8793 -3 1 613 27 8265 -4 2 1164 62 7625 -5 3 1143 41 6399 -6 4 1053 40 5215 -7 5 919 31 4122+Exercise 3
+We are going to compare mortality rates (hazard functions) between +children whose transplanted kidney was provided by a living donor (in +general a family member) and those whose source was recently deceased +(variable donor type:
+txtype). Use the life table method to +calculate the death rates for the first 5 years for each group (take in +the first year intervals of 4 months and then look at each year) and +show the results in a table. Estimate the hazard ratio in each time +interval as the ratio between the mortality rates in the two groups. +What do you notice?+dat.5 <- dat[dat$follow.up <= 5, ] +head(dat.5)+## hla.match age.donor age.rec cold.isc death year sex tx.type follow.up +## 1 2 25 15 6 0 2002 0 1 0 +## 2 1 42 10 11 1 1999 0 1 0 +## 3 1 9 14 8 0 2002 1 1 0 +## 4 0 35 12 26 0 2002 1 1 0 +## 5 2 17 12 28 1 1997 1 1 0 +## 6 3 44 13 1 0 2002 1 0 0+classify_time_interval = function(fu) { + if (fu <= 1/3) { + return(1/3) + } else if (fu <= 2/3) { + return(2/3) + } else if (fu <= 1) { + return(1) + } + ceiling(fu) +} + +dat.5$fu.interval <- sapply(dat.5$follow.up, classify_time_interval) +table(dat.5$fu.interval)+## +## 0.333333333333333 0.666666666666667 1 2 +## 982 528 640 1226 +## 3 4 5 +## 1184 1093 950+head(dat.5)+## hla.match age.donor age.rec cold.isc death year sex tx.type follow.up +## 1 2 25 15 6 0 2002 0 1 0 +## 2 1 42 10 11 1 1999 0 1 0 +## 3 1 9 14 8 0 2002 1 1 0 +## 4 0 35 12 26 0 2002 1 1 0 +## 5 2 17 12 28 1 1997 1 1 0 +## 6 3 44 13 1 0 2002 1 0 0 +## fu.interval +## 1 0.3333333 +## 2 0.3333333 +## 3 0.3333333 +## 4 0.3333333 +## 5 0.3333333 +## 6 0.3333333+dat.5.life <- dat.5 |> + group_by(fu.interval) |> + summarize( + n.censored = sum(death == 0), + n.event = sum(death), + n.at.risk = nrow(dat), + ) + +for (i in 2:nrow(dat.5.life)) { + j <- i - 1 + + n.censored.pre <- dat.5.life$n.censored[j] + n.event.pre <- dat.5.life$n.event[j] + n.at.risk.pre <- dat.5.life$n.at.risk[j] + + n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre + + dat.5.life$n.at.risk[i] <- n.at.risk +} + +print(nrow(dat))+## [1] 9775+dat.5.life## # A tibble: 7 × 4 +## fu.interval n.censored n.event n.at.risk +## <dbl> <int> <int> <int> +## 1 0.333 830 152 9775 +## 2 0.667 491 37 8793 +## 3 1 613 27 8265 +## 4 2 1164 62 7625 +## 5 3 1143 41 6399 +## 6 4 1053 40 5215 +## 7 5 919 31 4122n.removed.acc
----dat.5.life <- dat.5.life |> - mutate( - hazard.rate = n.event / n.at.risk - )
----get_life_table = function(dat.sub, dat) { - dat.sub <- dat.sub |> - group_by(fu.interval) |> - summarize( - n.censored = sum(death == 0), - n.event = sum(death), - n.at.risk = nrow(dat), - ) - - for (i in 2:nrow(dat.sub)) { - j <- i - 1 - - n.censored.pre <- dat.sub$n.censored[j] - n.event.pre <- dat.sub$n.event[j] - n.at.risk.pre <- dat.sub$n.at.risk[j] - - n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre - - dat.sub$n.at.risk[i] <- n.at.risk - } - - dat.sub <- dat.sub |> - mutate( - hazard.rate = n.event / n.at.risk - ) - - return(dat.sub) -}---dat.5.tx0 = dat.5[dat.5$tx.type == 0, ] -dat.5.tx1 = dat.5[dat.5$tx.type == 1, ]---tx0.life <- get_life_table(dat.5.tx0, dat[dat$tx.type == 0, ]) -tx0.life---# A tibble: 7 × 5 - fu.interval n.censored n.event n.at.risk hazard.rate - <dbl> <int> <int> <int> <dbl> -1 0.333 463 49 5148 0.00952 -2 0.667 253 13 4636 0.00280 -3 1 340 11 4370 0.00252 -4 2 591 24 4019 0.00597 -5 3 568 17 3404 0.00499 -6 4 521 16 2819 0.00568 -7 5 501 11 2282 0.00482---tx1.life <- get_life_table(dat.5.tx1, dat[dat$tx.type == 1, ]) -tx1.life---# A tibble: 7 × 5 - fu.interval n.censored n.event n.at.risk hazard.rate - <dbl> <int> <int> <int> <dbl> -1 0.333 367 103 4627 0.0223 -2 0.667 238 24 4157 0.00577 -3 1 273 16 3895 0.00411 -4 2 573 38 3606 0.0105 -5 3 575 24 2995 0.00801 -6 4 532 24 2396 0.0100 -7 5 418 20 1840 0.0109---tx1.life$hazard.rate / tx0.life$hazard.rate---[1] 2.338731 2.058881 1.631929 1.764675 1.604557 1.764816 2.254941---hazard.df <- data.frame( - fu.interval = tx1.life$fu.interval, - hazard.rate.0 = tx0.life$hazard.rate, - hazard.rate.1 = tx1.life$hazard.rate, - hazard.ratio = tx1.life$hazard.rate / tx0.life$hazard.rate -) - -ggplot(hazard.df, aes(x = fu.interval)) + - geom_line(aes(y = hazard.rate.0), color = "blue") + - geom_line(aes(y = hazard.rate.1), color = "orange")----- --
-ggplot(hazard.df, aes(x = fu.interval)) + - geom_line(aes(y = hazard.ratio))----- --
++dat.5.life <- dat.5.life |> + mutate( + hazard.rate = n.event / n.at.risk + )
++get_life_table = function(dat.sub, dat) { + dat.sub <- dat.sub |> + group_by(fu.interval) |> + summarize( + n.censored = sum(death == 0), + n.event = sum(death), + n.at.risk = nrow(dat), + ) + + for (i in 2:nrow(dat.sub)) { + j <- i - 1 + + n.censored.pre <- dat.sub$n.censored[j] + n.event.pre <- dat.sub$n.event[j] + n.at.risk.pre <- dat.sub$n.at.risk[j] + + n.at.risk <- n.at.risk.pre - n.event.pre - n.censored.pre + + dat.sub$n.at.risk[i] <- n.at.risk + } + + dat.sub <- dat.sub |> + mutate( + hazard.rate = n.event / n.at.risk + ) + + return(dat.sub) +}+dat.5.tx0 = dat.5[dat.5$tx.type == 0, ] +dat.5.tx1 = dat.5[dat.5$tx.type == 1, ]+tx0.life <- get_life_table(dat.5.tx0, dat[dat$tx.type == 0, ]) +tx0.life+## # A tibble: 7 × 5 +## fu.interval n.censored n.event n.at.risk hazard.rate +## <dbl> <int> <int> <int> <dbl> +## 1 0.333 463 49 5148 0.00952 +## 2 0.667 253 13 4636 0.00280 +## 3 1 340 11 4370 0.00252 +## 4 2 591 24 4019 0.00597 +## 5 3 568 17 3404 0.00499 +## 6 4 521 16 2819 0.00568 +## 7 5 501 11 2282 0.00482+tx1.life <- get_life_table(dat.5.tx1, dat[dat$tx.type == 1, ]) +tx1.life+## # A tibble: 7 × 5 +## fu.interval n.censored n.event n.at.risk hazard.rate +## <dbl> <int> <int> <int> <dbl> +## 1 0.333 367 103 4627 0.0223 +## 2 0.667 238 24 4157 0.00577 +## 3 1 273 16 3895 0.00411 +## 4 2 573 38 3606 0.0105 +## 5 3 575 24 2995 0.00801 +## 6 4 532 24 2396 0.0100 +## 7 5 418 20 1840 0.0109+tx1.life$hazard.rate / tx0.life$hazard.rate+## [1] 2.338731 2.058881 1.631929 1.764675 1.604557 1.764816 2.254941+hazard.df <- data.frame( + fu.interval = tx1.life$fu.interval, + hazard.rate.0 = tx0.life$hazard.rate, + hazard.rate.1 = tx1.life$hazard.rate, + hazard.ratio = tx1.life$hazard.rate / tx0.life$hazard.rate +) + +ggplot(hazard.df, aes(x = fu.interval)) + + geom_line(aes(y = hazard.rate.0), color = "blue") + + geom_line(aes(y = hazard.rate.1), color = "orange")+
+ggplot(hazard.df, aes(x = fu.interval)) + + geom_line(aes(y = hazard.ratio))+
General Solution:
----get.life.table <- function(dat, time.intervals) { - n.pop <- nrow(dat) - - dat |> - recode.dat(time.intervals) |> - group_by(fu.interval) |> - summarize( - n.censored = sum(.data$death == 0), - n.event = sum(.data$death), - ) |> - ungroup() |> - calculate.hazard(n.pop) -} - -get.life.table.by.groups <- function(dat, time.intervals, grps) { - grps |> - lapply(function(grp) { - dat |> - get.life.table.by.group(time.intervals, grp) |> - mutate( - grp.name = grp, - grp.value = pick(1)[[1]] - ) |> - select(-1) - }) |> - bind_rows() -} - -get.life.table.by.group <- function(dat, time.intervals, grp) { - dat |> - recode.dat(time.intervals) |> - group_by(fu.interval, .data[[grp]]) |> - summarize( - n.censored = sum(.data$death == 0), - n.event = sum(.data$death), - .groups = "keep" - ) |> - ungroup(fu.interval) |> - group_modify(function(df.sub, grp) { - grp.name <- names(grp) - grp.value <- grp[[1]] - n.pop <- (dat[[grp.name]] == grp.value) |> sum() - calculate.hazard(df.sub, n.pop) - }) |> - ungroup() -} - -calculate.hazard <- function(life.table, n.pop) { - n.removed <- life.table$n.event + life.table$n.censored - n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) - life.table |> - mutate( - n.at.risk = n.pop - n.removed.accum, - # TODO: how to account for censored? How do we adjust for uneven interval? - hazard.rate = n.event / n.at.risk - ) -} - -recode.dat <- function(dat, time.intervals) { - df <- dat[dat$follow.up <= sum(time.intervals), ] - time.points <- cumsum(time.intervals) - df$fu.interval <- sapply(df$follow.up, function(time) { - time.points[time <= time.points][1] - }) - - df -}---time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) -grps <- c("tx.type", "sex", "hla.match", "cold.isc") - -get.life.table.by.groups(dat, time.intervals, grps)---# A tibble: 593 × 7 - fu.interval n.censored n.event n.at.risk hazard.rate grp.name grp.value - <dbl> <int> <int> <dbl> <dbl> <chr> <dbl> - 1 0.333 463 49 5148 0.00952 tx.type 0 - 2 0.667 253 13 4636 0.00280 tx.type 0 - 3 1 340 11 4370 0.00252 tx.type 0 - 4 2 591 24 4019 0.00597 tx.type 0 - 5 3 568 17 3404 0.00499 tx.type 0 - 6 4 521 16 2819 0.00568 tx.type 0 - 7 5 501 11 2282 0.00482 tx.type 0 - 8 0.333 367 103 4627 0.0223 tx.type 1 - 9 0.667 238 24 4157 0.00577 tx.type 1 -10 1 273 16 3895 0.00411 tx.type 1 -# ℹ 583 more rows---grps <- c("tx.type", "sex", "hla.match", "cold.isc") -grps <- c("tx.type") - - -plot.hazard.rate.by.groups <- function(dat, time.intervals, grps) { - dat |> - get.life.table.by.groups(time.intervals, grps) |> - ggplot(aes(x = fu.interval, y = hazard.rate, - color = paste(grp.name, grp.value, sep = " = "))) + - geom_point() + - geom_line() + - labs( - color = "Group" - ) + - theme( - legend.position = "bottom" - ) -}---time.intervals <- c(rep(1/3, 3), rep(1, 4)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))----- --
---time.intervals <- rep(1, 5) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))----- --
---time.intervals <- rep(1/3, 15) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))----- --
---time.intervals <- c(rep(1/3, 3*3), rep(1, 2)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))----- --
---time.intervals <- c(rep(1/4, 4), rep(1, 4)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))----- --
---time.intervals <- c(rep(1/4, 4*2), rep(1, 3)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))----- --
---time.intervals <- c(rep(1/4, 4*2), rep(1/3, 3*3), rep(1, 6)) -plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))----- --
++get.life.table <- function(dat, time.intervals) { + n.pop <- nrow(dat) + + dat |> + recode.dat(time.intervals) |> + group_by(fu.interval) |> + summarize( + n.censored = sum(.data$death == 0), + n.event = sum(.data$death), + ) |> + ungroup() |> + calculate.hazard(n.pop) +} + +get.life.table.by.groups <- function(dat, time.intervals, grps) { + grps |> + lapply(function(grp) { + dat |> + get.life.table.by.group(time.intervals, grp) |> + mutate( + grp.name = grp, + grp.value = pick(1)[[1]] + ) |> + select(-1) + }) |> + bind_rows() +} + +get.life.table.by.group <- function(dat, time.intervals, grp) { + dat |> + recode.dat(time.intervals) |> + group_by(fu.interval, .data[[grp]]) |> + summarize( + n.censored = sum(.data$death == 0), + n.event = sum(.data$death), + .groups = "keep" + ) |> + ungroup(fu.interval) |> + group_modify(function(df.sub, grp) { + grp.name <- names(grp) + grp.value <- grp[[1]] + n.pop <- (dat[[grp.name]] == grp.value) |> sum() + calculate.hazard(df.sub, n.pop) + }) |> + ungroup() +} + +calculate.hazard <- function(life.table, n.pop) { + n.removed <- life.table$n.event + life.table$n.censored + n.removed.accum <- c(0, cumsum(n.removed)[-length(n.removed)]) + life.table |> + mutate( + n.at.risk = n.pop - n.removed.accum, + # TODO: how to account for censored? How do we adjust for uneven interval? + hazard.rate = n.event / n.at.risk + ) +} + +recode.dat <- function(dat, time.intervals) { + df <- dat[dat$follow.up <= sum(time.intervals), ] + time.points <- cumsum(time.intervals) + df$fu.interval <- sapply(df$follow.up, function(time) { + time.points[time <= time.points][1] + }) + + df +}+time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) +grps <- c("tx.type", "sex", "hla.match", "cold.isc") + +get.life.table.by.groups(dat, time.intervals, grps)+## # A tibble: 593 × 7 +## fu.interval n.censored n.event n.at.risk hazard.rate grp.name grp.value +## <dbl> <int> <int> <dbl> <dbl> <chr> <dbl> +## 1 0.333 463 49 5148 0.00952 tx.type 0 +## 2 0.667 253 13 4636 0.00280 tx.type 0 +## 3 1 340 11 4370 0.00252 tx.type 0 +## 4 2 591 24 4019 0.00597 tx.type 0 +## 5 3 568 17 3404 0.00499 tx.type 0 +## 6 4 521 16 2819 0.00568 tx.type 0 +## 7 5 501 11 2282 0.00482 tx.type 0 +## 8 0.333 367 103 4627 0.0223 tx.type 1 +## 9 0.667 238 24 4157 0.00577 tx.type 1 +## 10 1 273 16 3895 0.00411 tx.type 1 +## # ℹ 583 more rows+grps <- c("tx.type", "sex", "hla.match", "cold.isc") +grps <- c("tx.type") + + +plot.hazard.rate.by.groups <- function(dat, time.intervals, grps) { + dat |> + get.life.table.by.groups(time.intervals, grps) |> + ggplot(aes(x = fu.interval, y = hazard.rate, + color = paste(grp.name, grp.value, sep = " = "))) + + geom_point() + + geom_line() + + labs( + color = "Group" + ) + + theme( + legend.position = "bottom" + ) +}+time.intervals <- c(rep(1/3, 3), rep(1, 4)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))+
+time.intervals <- rep(1, 5) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))+
+time.intervals <- rep(1/3, 15) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))+
+time.intervals <- c(rep(1/3, 3*3), rep(1, 2)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))+
+time.intervals <- c(rep(1/4, 4), rep(1, 4)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))+
+time.intervals <- c(rep(1/4, 4*2), rep(1, 3)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))+
+time.intervals <- c(rep(1/4, 4*2), rep(1/3, 3*3), rep(1, 6)) +plot.hazard.rate.by.groups(dat, time.intervals, c("tx.type"))+
Compare
----time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) -get.life.table.by.groups(dat, time.intervals, grps)--+# A tibble: 14 × 7 - fu.interval n.censored n.event n.at.risk hazard.rate grp.name grp.value - <dbl> <int> <int> <dbl> <dbl> <chr> <int> - 1 0.333 463 49 5148 0.00952 tx.type 0 - 2 0.667 253 13 4636 0.00280 tx.type 0 - 3 1 340 11 4370 0.00252 tx.type 0 - 4 2 591 24 4019 0.00597 tx.type 0 - 5 3 568 17 3404 0.00499 tx.type 0 - 6 4 521 16 2819 0.00568 tx.type 0 - 7 5 501 11 2282 0.00482 tx.type 0 - 8 0.333 367 103 4627 0.0223 tx.type 1 - 9 0.667 238 24 4157 0.00577 tx.type 1 -10 1 273 16 3895 0.00411 tx.type 1 -11 2 573 38 3606 0.0105 tx.type 1 -12 3 575 24 2995 0.00801 tx.type 1 -13 4 532 24 2396 0.0100 tx.type 1 -14 5 418 20 1840 0.0109 tx.type 1+time.intervals <- c(1/3, 1/3, 1/3, 1, 1, 1, 1) +get.life.table.by.groups(dat, time.intervals, grps)+## # A tibble: 14 × 7 +## fu.interval n.censored n.event n.at.risk hazard.rate grp.name grp.value +## <dbl> <int> <int> <dbl> <dbl> <chr> <int> +## 1 0.333 463 49 5148 0.00952 tx.type 0 +## 2 0.667 253 13 4636 0.00280 tx.type 0 +## 3 1 340 11 4370 0.00252 tx.type 0 +## 4 2 591 24 4019 0.00597 tx.type 0 +## 5 3 568 17 3404 0.00499 tx.type 0 +## 6 4 521 16 2819 0.00568 tx.type 0 +## 7 5 501 11 2282 0.00482 tx.type 0 +## 8 0.333 367 103 4627 0.0223 tx.type 1 +## 9 0.667 238 24 4157 0.00577 tx.type 1 +## 10 1 273 16 3895 0.00411 tx.type 1 +## 11 2 573 38 3606 0.0105 tx.type 1 +## 12 3 575 24 2995 0.00801 tx.type 1 +## 13 4 532 24 2396 0.0100 tx.type 1 +## 14 5 418 20 1840 0.0109 tx.type 1+hazard.df## fu.interval hazard.rate.0 hazard.rate.1 hazard.ratio +## 1 0.3333333 0.009518260 0.022260644 2.338731 +## 2 0.6666667 0.002804142 0.005773394 2.058881 +## 3 1.0000000 0.002517162 0.004107831 1.631929 +## 4 2.0000000 0.005971635 0.010537992 1.764675 +## 5 3.0000000 0.004994125 0.008013356 1.604557 +## 6 4.0000000 0.005675772 0.010016694 1.764816 +## 7 5.0000000 0.004820333 0.010869565 2.254941-- --hazard.df---fu.interval hazard.rate.0 hazard.rate.1 hazard.ratio -1 0.3333333 0.009518260 0.022260644 2.338731 -2 0.6666667 0.002804142 0.005773394 2.058881 -3 1.0000000 0.002517162 0.004107831 1.631929 -4 2.0000000 0.005971635 0.010537992 1.764675 -5 3.0000000 0.004994125 0.008013356 1.604557 -6 4.0000000 0.005675772 0.010016694 1.764816 -7 5.0000000 0.004820333 0.010869565 2.254941- -1.4 Exercise 4
-Show a plot with Kaplan-Meier survival curves for the two donor types.
----km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat[dat$follow.up <= 12, ]) - -plot(km.tx, col = c("blue", "orange")) -legend(legend = c("cadaveric", "living"), "bottomleft", lwd = 2, col = c("blue", "orange"))----- --
- 1.5 Exercise 5
-Fit a univariate Cox model with predictor donor type. Report the hazard ratio and 95% confidence interval and interpret the result obtained.
---cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat) -summary(cox.tx)-+Call: -coxph(formula = Surv(follow.up, death) ~ tx.type, data = dat) +++Exercise 4
+Show a plot with Kaplan-Meier survival curves for the two donor +types.
++km.tx <- survfit(Surv(follow.up, death) ~ tx.type, data = dat[dat$follow.up <= 12, ]) - n= 9775, number of events= 465 +plot(km.tx, col = c("blue", "orange")) +legend(legend = c("cadaveric", "living"), "bottomleft", lwd = 2, col = c("blue", "orange"))+
++Exercise 5
+Fit a univariate Cox model with predictor donor type. Report the +hazard ratio and 95% confidence interval and interpret the result +obtained.
++cox.tx <- coxph(Surv(follow.up, death) ~ tx.type, data = dat) +summary(cox.tx)+## Call: +## coxph(formula = Surv(follow.up, death) ~ tx.type, data = dat) +## +## n= 9775, number of events= 465 +## +## coef exp(coef) se(coef) z Pr(>|z|) +## tx.type 0.64469 1.90539 0.09558 6.745 1.53e-11 *** +## --- +## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +## +## exp(coef) exp(-coef) lower .95 upper .95 +## tx.type 1.905 0.5248 1.58 2.298 +## +## Concordance= 0.586 (se = 0.012 ) +## Likelihood ratio test= 47.1 on 1 df, p=7e-12 +## Wald test = 45.5 on 1 df, p=2e-11 +## Score (logrank) test = 47.09 on 1 df, p=7e-12+log.hazard.ratio.hat <- cox.tx$coefficient +hazard.ratio.hat <- log.hazard.ratio.hat |> exp()+log.hazard.ratio.se <- summary(cox.tx)$coefficients[3] +log.hazard.ratio.hat.lower.bound <- log.hazard.ratio.hat - qnorm(0.975) * log.hazard.ratio.se +log.hazard.ratio.hat.upper.bound <- log.hazard.ratio.hat + qnorm(0.975) * log.hazard.ratio.se - coef exp(coef) se(coef) z Pr(>|z|) -tx.type 0.64469 1.90539 0.09558 6.745 1.53e-11 *** ---- -Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +c(log.hazard.ratio.hat.lower.bound, log.hazard.ratio.hat.upper.bound) |> exp()+## tx.type tx.type +## 1.579899 2.297932++Patients receiving kidney transplants from living donors +(
+tx.type = 1) has 1.91 higher hazard rate than those +receiving from cadaveric donors (tx.type = 0).Since 1 is not in the 95% confidence interval, the result is +statistically significant.
++-Exercise 6
+Research shows that an important determinant of mortality after +kidney transplant is the age of the recipient. Fit a Cox model with age +as predictor and estimate the hazard ratio and its confidence interval. +Consider age first as continuous variable and then divide into +categories.
+++Continuous
++cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) +cox.age.rec |> summary()+## Call: +## coxph(formula = Surv(follow.up, death) ~ age.rec, data = dat) +## +## n= 9766, number of events= 464 +## (9 observations deleted due to missingness) +## +## coef exp(coef) se(coef) z Pr(>|z|) +## age.rec -0.042550 0.958342 0.008434 -5.045 4.53e-07 *** +## --- +## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +## +## exp(coef) exp(-coef) lower .95 upper .95 +## age.rec 0.9583 1.043 0.9426 0.9743 +## +## Concordance= 0.586 (se = 0.016 ) +## Likelihood ratio test= 24.81 on 1 df, p=6e-07 +## Wald test = 25.45 on 1 df, p=5e-07 +## Score (logrank) test = 25.75 on 1 df, p=4e-07+-Categorical (Case 1)
++recode.age <- function(dat, ages) { + df$age.group <- sapply(df$follow.up, function(time) { + time.points[time <= time.points][1] + }) - exp(coef) exp(-coef) lower .95 upper .95 -tx.type 1.905 0.5248 1.58 2.298 + df +}-dat.age.rec <- dat[!is.na(dat$age.rec), ] +class.age <- function(age) { + if (age < 2) { + return("0-1") + } else if (age < 6) { + return("2-5") + } else if (age < 11) { + return("6-10") + } else if (age < 19) { + return("11-18") + } +} +dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |> + factor(levels = c( + "0-1", + "2-5", + "6-10", + "11-18" + )) -Concordance= 0.586 (se = 0.012 ) -Likelihood ratio test= 47.1 on 1 df, p=7e-12 -Wald test = 45.5 on 1 df, p=2e-11 -Score (logrank) test = 47.09 on 1 df, p=7e-12---log.hazard.ratio.hat <- cox.tx$coefficient -hazard.ratio.hat <- log.hazard.ratio.hat |> exp()---log.hazard.ratio.se <- summary(cox.tx)$coefficients[3] -log.hazard.ratio.hat.lower.bound <- log.hazard.ratio.hat - qnorm(0.975) * log.hazard.ratio.se -log.hazard.ratio.hat.upper.bound <- log.hazard.ratio.hat + qnorm(0.975) * log.hazard.ratio.se - -c(log.hazard.ratio.hat.lower.bound, log.hazard.ratio.hat.upper.bound) |> exp()---tx.type tx.type -1.579899 2.297932-- - -- ---Note ----Patients receiving kidney transplants from living donors (
-tx.type = 1) has 1.91 higher hazard rate than those receiving from cadaveric donors (tx.type = 0).Since 1 is not in the 95% confidence interval, the result is statistically significant.
-- -2 Exercise 6
-Research shows that an important determinant of mortality after kidney transplant is the age of the recipient. Fit a Cox model with age as predictor and estimate the hazard ratio and its confidence interval. Consider age first as continuous variable and then divide into categories.
-- -2.1 Continuous
----cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) -cox.age.rec |> summary()-+cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec) +Call: -coxph(formula = Surv(follow.up, death) ~ age.rec, data = dat) - n= 9766, number of events= 464 - (9 observations deleted due to missingness) - - coef exp(coef) se(coef) z Pr(>|z|) -age.rec -0.042550 0.958342 0.008434 -5.045 4.53e-07 *** ---- -Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 - - exp(coef) exp(-coef) lower .95 upper .95 -age.rec 0.9583 1.043 0.9426 0.9743 - -Concordance= 0.586 (se = 0.016 ) -Likelihood ratio test= 24.81 on 1 df, p=6e-07 -Wald test = 25.45 on 1 df, p=5e-07 -Score (logrank) test = 25.75 on 1 df, p=4e-07+emmeans(cox.age.rec, pairwise ~ age.group, type = "response")## $emmeans +## age.group response SE df asymp.LCL asymp.UCL +## 0-1 1.000 0.0000 Inf 1.000 1.000 +## 2-5 0.552 0.0975 Inf 0.390 0.780 +## 6-10 0.347 0.0617 Inf 0.245 0.492 +## 11-18 0.361 0.0546 Inf 0.268 0.485 +## +## Confidence level used: 0.95 +## Intervals are back-transformed from the log scale +## +## $contrasts +## contrast ratio SE df null z.ratio p.value +## (0-1) / (2-5) 1.813 0.320 Inf 1 3.366 0.0043 +## (0-1) / (6-10) 2.880 0.511 Inf 1 5.956 <.0001 +## (0-1) / (11-18) 2.771 0.419 Inf 1 6.736 <.0001 +## (2-5) / (6-10) 1.589 0.251 Inf 1 2.928 0.0179 +## (2-5) / (11-18) 1.529 0.196 Inf 1 3.314 0.0051 +## (6-10) / (11-18) 0.962 0.124 Inf 1 -0.298 0.9908 +## +## P value adjustment: tukey method for comparing a family of 4 estimates +## Tests are performed on the log scale- -2.2 Categorical (Case 1)
----recode.age <- function(dat, ages) { - df$age.group <- sapply(df$follow.up, function(time) { - time.points[time <= time.points][1] - }) - - df -}---dat.age.rec <- dat[!is.na(dat$age.rec), ] -class.age <- function(age) { - if (age < 2) { - return("0,1") - } else if (age < 6) { - return("2,3,4,5") - } else if (age < 11) { - return("6,7,8,9,10") - } else if (age < 19) { - return("11-18") - } -}---dat.age.rec$age.group <- sapply(dat.age.rec$age.rec, class.age) |> - factor(levels = c( - "0,1", - "2,3,4,5", - "6,7,8,9,10", - "11-18" - )) - - -cox.age.rec <- coxph(Surv(follow.up, death) ~ age.group, data = dat.age.rec) -cox.age.rec |> summary()---Call: -coxph(formula = Surv(follow.up, death) ~ age.group, data = dat.age.rec) - - n= 9766, number of events= 464 - - coef exp(coef) se(coef) z Pr(>|z|) -age.group2,3,4,5 -0.5947 0.5517 0.1767 -3.366 0.000764 *** -age.group6,7,8,9,10 -1.0578 0.3472 0.1776 -5.956 2.59e-09 *** -age.group11-18 -1.0192 0.3609 0.1513 -6.736 1.63e-11 *** ---- -Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 - - exp(coef) exp(-coef) lower .95 upper .95 -age.group2,3,4,5 0.5517 1.813 0.3902 0.7801 -age.group6,7,8,9,10 0.3472 2.880 0.2452 0.4918 -age.group11-18 0.3609 2.771 0.2683 0.4855 - -Concordance= 0.584 (se = 0.015 ) -Likelihood ratio test= 44.93 on 3 df, p=1e-09 -Wald test = 53.97 on 3 df, p=1e-11 -Score (logrank) test = 57.87 on 3 df, p=2e-12---emmeans(cox.age.rec, pairwise ~ age.group, type = "response")---$emmeans - age.group response SE df asymp.LCL asymp.UCL - 0,1 1.000 0.0000 Inf 1.000 1.000 - 2,3,4,5 0.552 0.0975 Inf 0.390 0.780 - 6,7,8,9,10 0.347 0.0617 Inf 0.245 0.492 - 11-18 0.361 0.0546 Inf 0.268 0.485 - -Confidence level used: 0.95 -Intervals are back-transformed from the log scale - -$contrasts - contrast ratio SE df null z.ratio p.value - 0,1 / 2,3,4,5 1.813 0.320 Inf 1 3.366 0.0043 - 0,1 / 6,7,8,9,10 2.880 0.511 Inf 1 5.956 <.0001 - 0,1 / (11-18) 2.771 0.419 Inf 1 6.736 <.0001 - 2,3,4,5 / 6,7,8,9,10 1.589 0.251 Inf 1 2.928 0.0179 - 2,3,4,5 / (11-18) 1.529 0.196 Inf 1 3.314 0.0051 - 6,7,8,9,10 / (11-18) 0.962 0.124 Inf 1 -0.298 0.9908 - -P value adjustment: tukey method for comparing a family of 4 estimates -Tests are performed on the log scale- 3 Exercise 7
-Fit a multivariate Cox model by using other predictors and describe your results.
----sapply(names(dat), function(col) { - is.na(dat[, col]) |> sum() -})---hla.match age.donor age.rec cold.isc death year sex tx.type - 234 113 9 2250 0 0 0 0 -follow.up - 0---names(dat)---[1] "hla.match" "age.donor" "age.rec" "cold.isc" "death" "year" -[7] "sex" "tx.type" "follow.up"---Waldtest <- function(cox.fit, i.betas){ - q <- length(i.betas) - coefs <- cox.fit$coefficients - # print(cox.fit$var) - var <- cox.fit$var[i.betas, i.betas] - Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] - p.value <- 1-pchisq(as.numeric(Wald), df = q) - return(cbind(Wald, p.value)) -}-+-fit.cox.model <- function(surv, i.betas) { - fit.tx <- coxph(surv, data = dat) - Waldtest(fit.tx, i.betas) -}++Exercise 7
+Fit a multivariate Cox model by using other predictors and describe +your results.
++sapply(names(dat), function(col) { + is.na(dat[, col]) |> sum() +})+## hla.match age.donor age.rec cold.isc death year sex tx.type +## 234 113 9 2250 0 0 0 0 +## follow.up +## 0+names(dat)+## [1] "hla.match" "age.donor" "age.rec" "cold.isc" "death" "year" +## [7] "sex" "tx.type" "follow.up"+Waldtest <- function(cox.fit, i.betas){ + q <- length(i.betas) + coefs <- cox.fit$coefficients + # print(cox.fit$var) + var <- cox.fit$var[i.betas, i.betas] + Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] + p.value <- 1-pchisq(as.numeric(Wald), df = q) + return(cbind(Wald, p.value)) +}fit.cox.model <- function(surv, i.betas) { + fit.tx <- coxph(surv, data = dat) + Waldtest(fit.tx, i.betas) +}Start with
-tx.type-+-surv <- Surv(follow.up, death) ~ tx.type + age.rec -fit.cox.model(surv, 1:2) # NOTE: Why is this 1???---p.value -[1,] 77.3778 0+surv <- Surv(follow.up, death) ~ tx.type + age.rec +fit.cox.model(surv, 1:2) # NOTE: Why is this 1???## p.value +## [1,] 77.3778 0Iteration 1:
-tx.type + age + ?---names(dat)---[1] "hla.match" "age.donor" "age.rec" "cold.isc" "death" "year" -[7] "sex" "tx.type" "follow.up"-+-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match -fit.cox.model(surv, 3)---p.value -[1,] 8.146717 0.004313919-surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor -fit.cox.model(surv, 3)---p.value -[1,] 6.509223 0.01073164-surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc -fit.cox.model(surv, 3)---p.value -[1,] 0.7057259 0.4008664-surv <- Surv(follow.up, death) ~ tx.type + age.rec + year -fit.cox.model(surv, 3)---p.value -[1,] 4.680593 0.03050521-surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex -fit.cox.model(surv, 3)---p.value -[1,] 1.921176 0.1657271+names(dat)+## [1] "hla.match" "age.donor" "age.rec" "cold.isc" "death" "year" +## [7] "sex" "tx.type" "follow.up"+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match +fit.cox.model(surv, 3)+## p.value +## [1,] 8.146717 0.004313919+surv <- Surv(follow.up, death) ~ tx.type + age.rec + age.donor +fit.cox.model(surv, 3)+## p.value +## [1,] 6.509223 0.01073164+surv <- Surv(follow.up, death) ~ tx.type + age.rec + cold.isc +fit.cox.model(surv, 3)+## p.value +## [1,] 0.7057259 0.4008664+surv <- Surv(follow.up, death) ~ tx.type + age.rec + year +fit.cox.model(surv, 3)+## p.value +## [1,] 4.680593 0.03050521+surv <- Surv(follow.up, death) ~ tx.type + age.rec + sex +fit.cox.model(surv, 3)## p.value +## [1,] 1.921176 0.1657271Add
hla.matchIteration 2:
-tx.type + age.rec + hla.match + ?-+-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor -fit.cox.model(surv, 4)---p.value -[1,] 5.211567 0.0224371-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + cold.isc -fit.cox.model(surv, 4)---p.value -[1,] 0.9157965 0.3385811-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + year -fit.cox.model(surv, 4)---p.value -[1,] 4.94216 0.02620927-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + sex -fit.cox.model(surv, 4)---p.value -[1,] 0.981911 0.3217275+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor +fit.cox.model(surv, 4)+## p.value +## [1,] 5.211567 0.0224371+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + cold.isc +fit.cox.model(surv, 4)+## p.value +## [1,] 0.9157965 0.3385811+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + year +fit.cox.model(surv, 4)+## p.value +## [1,] 4.94216 0.02620927+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + sex +fit.cox.model(surv, 4)## p.value +## [1,] 0.981911 0.3217275Add
-age.donorIteration 2:
-tx.type + age.rec + hla.match + age.donor + ?-+-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - cold.isc -fit.cox.model(surv, 5)---p.value -[1,] 0.4450261 0.5047065-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - year -fit.cox.model(surv, 5)---p.value -[1,] 4.367751 0.03662531-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - sex -fit.cox.model(surv, 5)---p.value -[1,] 0.9343691 0.3337302Iteration 2: +
+tx.type + age.rec + hla.match + age.donor + ?+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + cold.isc +fit.cox.model(surv, 5)+## p.value +## [1,] 0.4450261 0.5047065+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + year +fit.cox.model(surv, 5)+## p.value +## [1,] 4.367751 0.03662531+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + sex +fit.cox.model(surv, 5)## p.value +## [1,] 0.9343691 0.3337302Add
-year-+-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - year + cold.isc -fit.cox.model(surv, 6)---p.value -[1,] 0.1614349 0.6878388-surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + - year + sex -fit.cox.model(surv, 6)---p.value -[1,] 0.9641705 0.3261383+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + year + cold.isc +fit.cox.model(surv, 6)+## p.value +## [1,] 0.1614349 0.6878388+surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor + + year + sex +fit.cox.model(surv, 6)## p.value +## [1,] 0.9641705 0.3261383Next, we test for each remaining variables
- -- 4 Exercise 8
-Estimate the survival function for specific covariate patterns. Based on the previous results choose the best predictors.
-Exercise 9 — Check the proportional hazards assumption. You may use the function cox.zph. Discuss the result and possible implications.
+++}); + + + + + - - \ No newline at end of file + + diff --git a/slides.Rmd b/slides.Rmd index 8666997..75017a5 100644 --- a/slides.Rmd +++ b/slides.Rmd @@ -21,6 +21,7 @@ library(gt) library(ggsurvfit) library(gridExtra) library(survminer) +library(broom) ``` @@ -32,9 +33,9 @@ dat <- dat |> mutate( sex = factor(sex, levels = c(0,1), labels = c("Female","Male")), tx.type = factor(tx.type, levels = c(0,1), labels = c("Cadaveric","Living")), - hla.match = factor(hla.match), - year = factor(year) - ) + # hla.match = factor(hla.match), + # year = factor(year) +) ``` ```{r} @@ -244,11 +245,11 @@ plot.tx.donor # Overall Kaplan-Meier Curve ```{r} -km_all <- survfit(Surv(follow.up, death) ~ 1, data = dat) +km.all <- survfit(Surv(follow.up, death) ~ 1, data = dat) ``` ```{r, fig.width=7, fig.height=5} -km_all |> +km.all |> ggsurvfit(type = "survival") + add_confidence_interval() + scale_y_continuous(limits = c(0, 1)) + @@ -261,7 +262,74 @@ km_all |> ``` ```{r} -summary(km_all, times = c(0, 4, 8, 12)) +header_bg <- "#A83262" # burgundy/magenta from right panel +header_text <- "white" +body_bg <- "#FFF4CC" # light cream +stripe_bg <- "#F6E8C3" # slightly darker cream +text_color <- "#333333" +border_col <- "#D8C8A5" +``` + +```{r} +times <- seq(0, 12, 2) +summary(km.all, times = times)[c("time", "n.risk", "surv")] |> + as.data.frame() |> + gt() |> + tab_header( + title = "Overall Kaplan-Meier Survival Summary" + ) |> + cols_label( + time = "Follow-up Time", + n.risk = "Number at Risk", + surv = "Survival Probability" + ) |> + fmt_number( + columns = surv, + decimals = 3 + ) |> + tab_options( + table.font.size = px(15), + heading.title.font.size = px(20), + column_labels.font.weight = "bold" + ) + +``` + +```{r} +summary(km.all, times = times)[c("time", "n.risk", "surv")] |> + as.data.frame() |> + gt() |> + tab_header( + title = "Overall Kaplan-Meier Survival Summary" + ) |> + cols_label( + time = "Follow-up Time", + n.risk = "Number at Risk", + surv = "Survival Probability" + ) |> + fmt_number(columns = surv, decimals = 3) |> + fmt_number(columns = time, decimals = 0) |> + tab_style( + style = list( + cell_fill(color = "#A83262"), + cell_text(color = "white", weight = "bold") + ), + locations = cells_column_labels() + ) |> + tab_style( + style = cell_fill(color = "#FFF4CC"), + locations = cells_body() + ) |> + opt_row_striping(row_striping = TRUE) |> + tab_options( + row.striping.background_color = "#F6E8C3", + table.border.top.color = "#A83262", + table.border.bottom.color = "#A83262", + column_labels.border.bottom.color = "#A83262", + table.font.size = px(16), + heading.title.font.size = px(18), + table.background.color = "#FFF4CC" + ) ``` # Cox Model (with intervals?) @@ -434,29 +502,229 @@ eme.age.rec.group$contrast |> # Identifying Predictors -TODO: write a more generic function +```{r} +# From exercise 9 +Waldtest <- function(coxph.object, idx) { + b <- coxph.object$coef[idx] + Sig <- coxph.object$var[idx, idx] + Wald <- b %*% solve(Sig) %*% b + p <- 1 - pchisq(as.numeric(Wald), df = length(idx)) + return(data.frame(Wald = Wald, df = length(idx), p = p)) +} + +test.new.var <- function(old.vars, new.var, i=1) { + new.surv <- reformulate( + termlabels = c(old.vars, new.var), + response = "Surv(follow.up, death)" + ) + cox.fit <- coxph(new.surv, data = dat, method = "breslow") + print(cox.fit) + Waldtest(cox.fit, i) +} +``` + +## Start with nothing + +```{r, echo=TRUE} +old.vars <- c() +test.new.var(old.vars, "tx.type", 1) +test.new.var(old.vars, "hla.match", 1) +test.new.var(old.vars, "age.donor", 1) +test.new.var(old.vars, "age.rec", 1) +test.new.var(old.vars, "cold.isc", 1) +test.new.var(old.vars, "sex", 1) +test.new.var(old.vars, "year", 1) +``` +## `tx.type` + ? + +```{r, echo=TRUE} +old.vars <- c("tx.type") + +# test.new.var(old.vars, "hla.match", 2:7) +test.new.var(old.vars, "hla.match", 2) +test.new.var(old.vars, "age.donor", 2) +test.new.var(old.vars, "age.rec", 2) +test.new.var(old.vars, "cold.isc", 2) +test.new.var(old.vars, "sex", 2) +test.new.var(old.vars, "year", 2) +``` + +`age.rec` has the smallest $p$-value + +## `tx.type` + `age.rec` + ? + +```{r, echo=TRUE} +old.vars <- c("tx.type", "age.rec") + +test.new.var(old.vars, "hla.match", 3) +test.new.var(old.vars, "age.donor", 3) +test.new.var(old.vars, "cold.isc", 3) +test.new.var(old.vars, "sex", 3) +test.new.var(old.vars, "year", 3) +``` + +`hla.match` has the smallest $p$-value + +## `tx.type` + `age.rec` + `hla.match` + ? + +```{r, echo=TRUE} +old.vars <- c("tx.type", "age.rec", "hla.match") + +test.new.var(old.vars, "age.donor", 4) +test.new.var(old.vars, "cold.isc", 4) +test.new.var(old.vars, "sex", 4) +test.new.var(old.vars, "year", 4) +``` + +`age.donor` has the smallest $p$-value + +## `tx.type` + `age.rec` + `hla.match` + `age.donor` + ? + +```{r, echo=TRUE} +old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor") + +test.new.var(old.vars, "cold.isc", 5) +test.new.var(old.vars, "sex", 5) +test.new.var(old.vars, "year", 5) +``` + +`year` has the smallest $p$-value + +## `tx.type` + `age.rec` + `hla.match` + `age.donor` + `year` + +```{r, echo=TRUE} +old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor", "year") + +test.new.var(old.vars, "cold.isc", 6) +test.new.var(old.vars, "sex", 6) +``` + +Neither has $p$-value less than 0.05. Stop. + +# Full Model ```{r} +surv.full <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match + + age.donor + year +cox.full <- coxph(surv.full, data = dat, method = "breslow") +``` + +```{r, echo=TRUE} +cox.full +``` + +```{r, fig.width=7, fig.height=5} +tidy(cox.full, exponentiate = TRUE, conf.int = TRUE) |> + mutate( + term = recode( + term, + "tx.typeLiving" = "Living donor", + "age.rec" = "Recipient age", + "hla.match" = "HLA match", + "age.donor" = "Donor age", + "year" = "Transplant year" + ) + ) |> + ggplot(aes(x = estimate, y = reorder(term, estimate))) + + geom_vline(xintercept = 1, linetype = "dashed") + + geom_errorbarh( + aes(xmin = conf.low, xmax = conf.high), + height = 0.2 + ) + + scale_x_log10() + + geom_point(size = 2) + + labs( + title = "Hazard Ratios from Full Cox Model", + x = "Hazard Ratio", + y = NULL + ) + + my.theme +``` + +## What happened to `year`? + +```{r} +ggplot(data = dat) + + geom_boxplot(aes(x = factor(year), y = follow.up)) +``` + +```{r} +ggplot(data = dat) + + geom_boxplot(aes(x = factor(year), y = follow.up)) + + facet_grid(death ~ .) +``` + +Probabily want to omit it + +## Without `year` + +```{r} +surv.full.1 <- Surv(follow.up + death) ~ tx.type + age.rec + hla.match + + age.donor +cox.full.1 <- coxph(surv.full.1, data = dat, method = "breslow") +``` + +```{r, echo=TRUE} +cox.full.1 +``` + +```{r, fig.width=7, fig.height=5} +tidy(cox.full.1, exponentiate = TRUE, conf.int = TRUE) |> + mutate( + term = recode( + term, + "tx.typeLiving" = "Living donor", + "age.rec" = "Recipient age", + "hla.match" = "HLA match", + "age.donor" = "Donor age", + ) + ) |> + ggplot(aes(x = estimate, y = reorder(term, estimate))) + + geom_vline(xintercept = 1, linetype = "dashed") + + geom_errorbarh( + aes(xmin = conf.low, xmax = conf.high), + height = 0.2 + ) + + scale_x_log10() + + geom_point(size = 2) + + labs( + title = "Hazard Ratios from Full Cox Model (Without Year)", + x = "Hazard Ratio", + y = NULL + ) + + my.theme +``` + + + + + + + + +--- + +TODO: write a more generic function (UPDATE: FAILED) + +Exercise 8
+Estimate the survival function for specific covariate patterns. Based +on the previous results choose the best predictors.
+Exercise 9 — Check the proportional hazards assumption. You may use +the function cox.zph. Discuss the result and possible implications.
H0: PH-assumption holds H1: PH-assumption doesn’t hold
----# different cox model!! -cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat) -cox.zph(cox.final)--+chisq df p -tx.type 2.56 1 0.10948 -age.rec 35.98 1 2.0e-09 -hla.match 8.22 1 0.00414 -age.donor 11.05 1 0.00089 -GLOBAL 47.37 4 1.3e-09+# different cox model!! +cox.final <- coxph(Surv(follow.up, death) ~ tx.type + age.rec + hla.match + age.donor, data = dat) +cox.zph(cox.final)+## chisq df p +## tx.type 2.56 1 0.10948 +## age.rec 35.98 1 2.0e-09 +## hla.match 8.22 1 0.00414 +## age.donor 11.05 1 0.00089 +## GLOBAL 47.37 4 1.3e-09+cox.final+## Call: +## coxph(formula = Surv(follow.up, death) ~ tx.type + age.rec + +## hla.match + age.donor, data = dat) +## +## coef exp(coef) se(coef) z p +## tx.type 0.408215 1.504130 0.121728 3.354 0.000798 +## age.rec -0.043724 0.957218 0.008875 -4.927 8.36e-07 +## hla.match -0.107323 0.898236 0.040127 -2.675 0.007483 +## age.donor -0.008810 0.991228 0.003859 -2.283 0.022437 +## +## Likelihood ratio test=85.48 on 4 df, p=< 2.2e-16 +## n= 9433, number of events= 449 +## (342 observations deleted due to missingness)Exercise 10 — Plot the Schoenfeld residuals and comment. A non-random +pattern or slope in a plot of scaled residuals against time indicates a +violation, suggesting the covariate’s impact changes, while a horizontal +line around zero supports the assumption.
++plot(cox.zph(cox.final))+
+plot(cox.zph(cox.final))+
NOTES: left censoring: event happened before time zero -> not +applicable left truncation: not applicable?? right censoring: we saw +this in the follow up times, administrative censoring high
+! No left truncation, since our target population is patients who +received a kidney transplant. So we don’t care about patients who died +before receiving a transplant.
+! ADD: we used a Cox model but we already saw that the PH assumption +doesn’t hold !
+? remove year from prediction model, since it is not a patient +characteristic and may not be stable for future predictions. It mainly +captures changes in clinical practice rather than individual risk.
Exercise 10 — Plot the Schoenfeld residuals and comment. A non-random pattern or slope in a plot of scaled residuals against time indicates a violation, suggesting the covariate’s impact changes, while a horizontal line around zero supports the assumption.
----plot(cox.zph(cox.final))----- --
----- --
----- --
----- --
NOTES: left censoring: event happened before time zero -> not applicable left truncation: not applicable?? right censoring: we saw this in the follow up times, administrative censoring high
-! No left truncation, since our target population is patients who received a kidney transplant. So we don’t care about patients who died before receiving a transplant.
-! ADD: we used a Cox model but we already saw that the PH assumption doesn’t hold !
-? remove year from prediction model, since it is not a patient characteristic and may not be stable for future predictions. It mainly captures changes in clinical practice rather than individual risk.
- - - - + + + + -+ +```{r, eval=FALSE} +# WARNING: BAD, DO NOT USE. +vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", + "tx.type") +included <- vars == "tx.type" + Waldtest <- function(cox.fit, i.betas){ q <- length(i.betas) coefs <- cox.fit$coefficients var <- cox.fit$var[i.betas, i.betas] Wald <- coefs[i.betas]%*%solve(var)%*%coefs[i.betas] p.value <- 1-pchisq(as.numeric(Wald), df = q) - print(p.value) return(p.value) } -``` -```{r} -vars <- c("hla.match", "age.donor", "age.rec", "cold.isc", "year", "sex", - "tx.type") -included <- vars == "tx.type" -included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) -included <- c(FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE) -find.best.surv <- function(included) { - print('---') +find.model.vars <- function(included) { test.vars <- vars[!included] p.values <- sapply(test.vars, function(test.var) { new.surv <- reformulate( @@ -464,31 +732,24 @@ find.best.surv <- function(included) { response = "Surv(follow.up, death)" ) fit.tx <- coxph(new.surv, data = dat) - Waldtest(fit.tx, sum(included)) + Waldtest(fit.tx, 1) }) - if (sum(p.values > 0.05) == sum(included)) { - print(3) - return(included) - } - if (sum(included == TRUE) == length(included)) { - print(2) + if (all(p.values > 0.05)) { return(included) } - print(1) - print(which.min(p.values)) - print(included) - print(p.values) - print(which.min(p.values)) - included[which.min(p.values)] <- TRUE - print(included) - # find.best.surv(included) + new.var <- names(p.values)[which.min(p.values)] + included[which(vars == new.var)] <- TRUE + return(find.model.vars(included)) } - -find.best.surv(included) +included <- find.model.vars(included) +vars[included] ``` ++ + ```{r} # surv <- Surv(follow.up, death) ~ tx.type + age.rec + hla.match # fit.cox.model(surv, 3) diff --git a/slides.html b/slides.html index 260d904..1cc2c98 100644 --- a/slides.html +++ b/slides.html @@ -127,37 +127,90 @@ window.Quarto = { -- 1 Introduction: research question
-- 1.1 Survival after transplantation (M)
++++ +++Warning ++++There are 25 repondents with
+follow.up = 0, 21 of which withdeath = 0and other 4 withdeath = 1.+++dat[dat$follow.up == 0, ] |> + group_by(death) |> + summarise(n = n())+++# A tibble: 2 × 2 + death n + <int> <int> +1 0 21 +2 1 4Reasons to omit them, that is, have
+dat <- dat[dat$follow.up > 0,]at the very beginning:+
+- If
+death = 1, transplant failed. They shouldn’t be included in the research question “survival rate of a successful tranplant”.- If
+death = 0, transplant was successful, but they were immediately removed from populatoin at risk, contributiong nothing to the analysis.+ -1 Title
- 1.2 Identify Predictors (M)
++ -2 Table of Contents (?)
- 1.3 Why Survival Analysis (L)
-Motivation: study the distribution of time to event \(T\).
-Example: time of death after kidney transplant.
++ +3 Introduction (M)
++ +4 Method & Result
++ +4.1 Identify Predictors (M)
++
+- Data source
+- Study population
++++++[1] 9775+
+- Covariates
+- Outcome
++++++[1] "hla.match" "age.donor" "age.rec" "cold.isc" "death" "year" +[7] "sex" "tx.type" "follow.up"+ +4.2 Table (L)
+-+- + +++
+ ++ + ++ + + + + + + +Characteristic +Overall +
+N = 9,775Cadaveric +
+N = 5,148Living +
+N = 4,627+ +Sex, n(%) ++
++
++
++ +Female +4,014 (41%) +2,105 (41%) +1,909 (41%) ++ +Male +5,761 (59%) +3,043 (59%) +2,718 (59%) ++ +Recipient age, median (IQR) +13 (8, 16) +13 (7, 16) +14 (9, 16) ++ +Unknown +9 +7 +2 ++ +Donor age, median (IQR) +33 (21, 41) +37 (31, 42) +22 (15, 37) ++ +Unknown +113 +112 +1 ++ +HLA matches, n(%) ++
++
++
++ +0 +784 (8.2%) +125 (2.5%) +659 (14%) ++ +1 +1,504 (16%) +160 (3.2%) +1,344 (29%) ++ +2 +1,553 (16%) +228 (4.6%) +1,325 (29%) ++ +3 +3,778 (40%) +3,039 (61%) +739 (16%) ++ +4 +1,278 (13%) +1,007 (20%) +271 (5.9%) ++ +5 +365 (3.8%) +241 (4.9%) +124 (2.7%) ++ +6 +279 (2.9%) +153 (3.1%) +126 (2.7%) ++ +Unknown +234 +195 +39 ++ +Cold ischemic time (hours), median (IQR), +7 (1, 19) +1 (0, 1) +19 (13, 25) ++ + +Unknown +2,250 +1,490 +760 ++ +5 Elementary Analysis: Transplant Type (L)
++ +5.1 Transplant Type vs Sex
++
sexare similar acrosstx.type+++++++ ++
+ +5.2 Transplant Type vs Recipient Age
++++++++ ++
+ +6 Elementary Analysis: Transplant Type (Cont.)
++ +6.1 Transplant Type vs HLA Match
+Living donors have less
+hla.matchthen cadaveric donors.+++++++ ++
+ +6.2 Transplant Type vs Donor Age
++++++++ ++
+ +7 Overall Kaplan-Meier Curve
++++++++ ++
+++++ + +++ +
+ ++ +Overall Kaplan-Meier Survival Summary ++ + + +Follow-up Time +Number at Risk +Survival Probability ++ +0 +9775 +1.000 ++ +2 +6451 +0.968 ++ +4 +4150 +0.953 ++ +6 +2398 +0.937 ++ +8 +1197 +0.919 ++ +10 +492 +0.901 ++ + +12 +48 +0.888 ++++++ + +++ +
+ ++ +Overall Kaplan-Meier Survival Summary ++ + + +Follow-up Time +Number at Risk +Survival Probability ++ +0 +9775 +1.000 ++ +2 +6451 +0.968 ++ +4 +4150 +0.953 ++ +6 +2398 +0.937 ++ +8 +1197 +0.919 ++ +10 +492 +0.901 ++ + +12 +48 +0.888 ++ +8 Cox Model (with intervals?)
++ +9 Cox Model: Transplant Type
++++++Call: +coxph(formula = Surv(follow.up, death) ~ tx.type, data = dat) + + n= 9775, number of events= 465 + + coef exp(coef) se(coef) z Pr(>|z|) +tx.typeLiving 0.64469 1.90539 0.09558 6.745 1.53e-11 *** +--- +Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 + + exp(coef) exp(-coef) lower .95 upper .95 +tx.typeLiving 1.905 0.5248 1.58 2.298 + +Concordance= 0.586 (se = 0.012 ) +Likelihood ratio test= 47.1 on 1 df, p=7e-12 +Wald test = 45.5 on 1 df, p=2e-11 +Score (logrank) test = 47.09 on 1 df, p=7e-12+++++++ ++
+++++Call: survfit(formula = Surv(follow.up, death) ~ tx.type, data = dat) + + tx.type=Cadaveric + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 5148 0 1.000 0.00000 1.000 1.000 + 4 2297 130 0.967 0.00294 0.961 0.973 + 8 711 34 0.943 0.00514 0.933 0.953 + 12 27 13 0.908 0.01135 0.886 0.931 + + tx.type=Living + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 4627 4 0.999 0.000432 0.998 1.000 + 4 1853 225 0.937 0.004212 0.928 0.945 + 8 486 52 0.891 0.007720 0.876 0.906 + 12 21 7 0.864 0.013257 0.838 0.890+ +10 Cox Model: Age Recipient (Continuous)
++++++Call: +coxph(formula = Surv(follow.up, death) ~ age.rec, data = dat) + + n= 9766, number of events= 464 + (9 observations deleted due to missingness) + + coef exp(coef) se(coef) z Pr(>|z|) +age.rec -0.042550 0.958342 0.008434 -5.045 4.53e-07 *** +--- +Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 + + exp(coef) exp(-coef) lower .95 upper .95 +age.rec 0.9583 1.043 0.9426 0.9743 + +Concordance= 0.586 (se = 0.016 ) +Likelihood ratio test= 24.81 on 1 df, p=6e-07 +Wald test = 25.45 on 1 df, p=5e-07 +Score (logrank) test = 25.75 on 1 df, p=4e-07+++++++ ++
+++View life table +
++++++Call: survfit(formula = Surv(follow.up, death) ~ age.rec, data = dat) + +9 observations deleted due to missingness + age.rec=0 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 93 1 0.989 0.0107 0.969 1.00 + 4 34 17 0.784 0.0460 0.699 0.88 + 8 13 0 0.784 0.0460 0.699 0.88 + + age.rec=1 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 394 0 1.000 0.0000 1.000 1.000 + 4 158 32 0.900 0.0173 0.866 0.934 + 8 60 3 0.876 0.0217 0.835 0.920 + 12 1 0 0.876 0.0217 0.835 0.920 + + age.rec=2 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 381 0 1.000 0.0000 1.000 1.000 + 4 161 26 0.917 0.0160 0.886 0.949 + 8 73 2 0.899 0.0204 0.859 0.939 + 12 1 2 0.860 0.0336 0.797 0.929 + + age.rec=3 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 277 0 1.000 0.0000 1.000 1.000 + 4 130 15 0.938 0.0157 0.907 0.969 + 8 44 2 0.915 0.0223 0.872 0.960 + + age.rec=4 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 279 0 1.000 0.0000 1.000 1.000 + 4 129 14 0.942 0.0151 0.913 0.972 + 8 44 5 0.895 0.0257 0.846 0.947 + 12 1 0 0.895 0.0257 0.846 0.947 + + age.rec=5 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 286 0 1.000 0.0000 1.000 1.000 + 4 133 10 0.955 0.0141 0.928 0.983 + 8 49 3 0.920 0.0251 0.872 0.971 + 12 4 1 0.898 0.0330 0.835 0.965 + + age.rec=6 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 302 0 1.000 0.0000 1.000 1.000 + 4 140 10 0.961 0.0123 0.937 0.985 + 8 53 0 0.961 0.0123 0.937 0.985 + 12 2 0 0.961 0.0123 0.937 0.985 + + age.rec=7 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 333 0 1.000 0.0000 1.000 1.000 + 4 162 14 0.948 0.0140 0.921 0.976 + 8 62 2 0.932 0.0177 0.898 0.967 + 12 3 2 0.859 0.0546 0.759 0.973 + + age.rec=8 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 371 0 1.000 0.0000 1.000 1.000 + 4 183 12 0.964 0.0102 0.944 0.984 + 8 72 3 0.935 0.0199 0.897 0.975 + 12 1 3 0.869 0.0416 0.791 0.954 + + age.rec=9 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 388 0 1.000 0.0000 1.000 1.000 + 4 171 12 0.953 0.0137 0.927 0.980 + 8 63 7 0.891 0.0268 0.840 0.945 + 12 5 0 0.891 0.0268 0.840 0.945 + + age.rec=10 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 430 1 0.998 0.00232 0.993 1.000 + 4 203 7 0.976 0.00876 0.959 0.993 + 8 69 4 0.943 0.01874 0.907 0.981 + 12 3 2 0.846 0.06716 0.725 0.989 + + age.rec=11 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 443 0 1.000 0.0000 1.000 1.000 + 4 198 8 0.976 0.0087 0.959 0.993 + 8 63 6 0.921 0.0242 0.875 0.970 + 12 4 1 0.881 0.0455 0.796 0.975 + + age.rec=12 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 541 1 0.998 0.00185 0.995 1.000 + 4 246 15 0.963 0.00943 0.944 0.981 + 8 69 5 0.933 0.01684 0.900 0.966 + 12 2 1 0.904 0.03300 0.841 0.971 + + age.rec=13 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 644 0 1.000 0.00000 1.000 1.000 + 4 282 22 0.954 0.00981 0.935 0.974 + 8 57 4 0.922 0.02042 0.883 0.963 + 12 4 2 0.872 0.04013 0.796 0.954 + + age.rec=14 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 685 0 1.000 0.00000 1.000 1.000 + 4 303 15 0.971 0.00753 0.957 0.986 + 8 59 4 0.945 0.01537 0.916 0.976 + 12 2 1 0.909 0.03860 0.837 0.988 + + age.rec=15 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 874 1 0.999 0.00114 0.997 1.000 + 4 336 36 0.945 0.00910 0.927 0.963 + 8 66 6 0.918 0.01432 0.890 0.946 + 12 3 0 0.918 0.01432 0.890 0.946 + + age.rec=16 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 935 0 1.000 0.0000 1.000 1.000 + 4 375 29 0.956 0.0082 0.941 0.973 + 8 86 8 0.924 0.0145 0.896 0.953 + 12 4 0 0.924 0.0145 0.896 0.953 + + age.rec=17 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 1073 0 1.000 0.00000 1.000 1.000 + 4 410 28 0.961 0.00753 0.946 0.976 + 8 90 16 0.889 0.02020 0.850 0.929 + 12 1 2 0.854 0.03175 0.794 0.918 + + age.rec=18 + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 0 1037 0 1.000 0.00000 1.000 1.000 + 4 395 32 0.955 0.00802 0.940 0.971 + 8 104 6 0.931 0.01309 0.906 0.957 + 12 7 2 0.905 0.02210 0.863 0.949+ +11 Cox Model: Age Recipient (Categorical)
+++ ++cox.age.rec.group+++Call: +coxph(formula = Surv(follow.up, death) ~ age.rec.group, data = dat.age.rec.group) + + coef exp(coef) se(coef) z p +age.rec.group2-5 -0.5947 0.5517 0.1767 -3.366 0.000764 +age.rec.group6-10 -1.0578 0.3472 0.1776 -5.956 2.59e-09 +age.rec.group11-18 -1.0192 0.3609 0.1513 -6.736 1.63e-11 + +Likelihood ratio test=44.93 on 3 df, p=9.559e-10 +n= 9766, number of events= 464 + (9 observations deleted due to missingness)+++eme.age.rec.group <- emmeans(cox.age.rec.group, pairwise ~ age.rec.group, + type = "response") +eme.age.rec.group+++$emmeans + age.rec.group response SE df asymp.LCL asymp.UCL + 0-1 1.000 0.0000 Inf 1.000 1.000 + 2-5 0.552 0.0975 Inf 0.390 0.780 + 6-10 0.347 0.0617 Inf 0.245 0.492 + 11-18 0.361 0.0546 Inf 0.268 0.485 + +Confidence level used: 0.95 +Intervals are back-transformed from the log scale + +$contrasts + contrast ratio SE df null z.ratio p.value + (0-1) / (2-5) 1.813 0.320 Inf 1 3.366 0.0043 + (0-1) / (6-10) 2.880 0.511 Inf 1 5.956 <.0001 + (0-1) / (11-18) 2.771 0.419 Inf 1 6.736 <.0001 + (2-5) / (6-10) 1.589 0.251 Inf 1 2.928 0.0179 + (2-5) / (11-18) 1.529 0.196 Inf 1 3.314 0.0051 + (6-10) / (11-18) 0.962 0.124 Inf 1 -0.298 0.9908 + +P value adjustment: tukey method for comparing a family of 4 estimates +Tests are performed on the log scale+++++++ ++
+++++++ ++
+ +12 Identifying Predictors
++ +12.1 Start with nothing
++++old.vars <- c() +test.new.var(old.vars, "tx.type", 1)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.64466 1.90534 0.09558 6.745 1.53e-11 + +Likelihood ratio test=47.1 on 1 df, p=6.742e-12 +n= 9775, number of events= 465+++Wald df p +1 45.49594 1 1.529554e-11+test.new.var(old.vars, "hla.match", 1)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +hla.match -0.2096 0.8109 0.0351 -5.972 2.34e-09 + +Likelihood ratio test=35.87 on 1 df, p=2.112e-09 +n= 9541, number of events= 451 + (234 observations deleted due to missingness)+++Wald df p +1 35.6703 1 2.336993e-09+test.new.var(old.vars, "age.donor", 1)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +age.donor -0.021738 0.978497 0.003467 -6.269 3.62e-10 + +Likelihood ratio test=38.96 on 1 df, p=4.319e-10 +n= 9662, number of events= 464 + (113 observations deleted due to missingness)+++Wald df p +1 39.30567 1 3.62387e-10+test.new.var(old.vars, "age.rec", 1)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +age.rec -0.042545 0.958348 0.008434 -5.045 4.54e-07 + +Likelihood ratio test=24.8 on 1 df, p=6.362e-07 +n= 9766, number of events= 464 + (9 observations deleted due to missingness)+++Wald df p +1 25.44856 1 4.543371e-07+test.new.var(old.vars, "cold.isc", 1)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +cold.isc 0.019094 1.019278 0.004052 4.712 2.45e-06 + +Likelihood ratio test=20.88 on 1 df, p=4.888e-06 +n= 7525, number of events= 357 + (2250 observations deleted due to missingness)+++Wald df p +1 22.20574 1 2.449402e-06+test.new.var(old.vars, "sex", 1)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +sexMale -0.08422 0.91923 0.09374 -0.898 0.369 + +Likelihood ratio test=0.8 on 1 df, p=0.37 +n= 9775, number of events= 465+++Wald df p +1 0.8072063 1 0.3689475+test.new.var(old.vars, "year", 1)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +year -0.03667 0.96399 0.01550 -2.366 0.018 + +Likelihood ratio test=5.62 on 1 df, p=0.0178 +n= 9775, number of events= 465+++Wald df p +1 5.595603 1 0.01800561+ +12.2
+tx.type+ ?+++old.vars <- c("tx.type") + +# test.new.var(old.vars, "hla.match", 2:7) +test.new.var(old.vars, "hla.match", 2)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.45336 1.57359 0.11146 4.068 4.75e-05 +hla.match -0.12459 0.88286 0.03959 -3.147 0.00165 + +Likelihood ratio test=52.43 on 2 df, p=4.122e-12 +n= 9541, number of events= 451 + (234 observations deleted due to missingness)+++Wald df p +1 9.905243 1 0.001648086+test.new.var(old.vars, "age.donor", 2)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.478191 1.613154 0.106660 4.483 7.35e-06 +age.donor -0.013523 0.986568 0.003745 -3.611 0.000305 + +Likelihood ratio test=59.05 on 2 df, p=1.506e-13 +n= 9662, number of events= 464 + (113 observations deleted due to missingness)+++Wald df p +1 13.0412 1 0.0003047131+test.new.var(old.vars, "age.rec", 2)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.693775 2.001255 0.096157 7.215 5.39e-13 +age.rec -0.049081 0.952104 0.008536 -5.750 8.92e-09 + +Likelihood ratio test=78.72 on 2 df, p=< 2.2e-16 +n= 9766, number of events= 464 + (9 observations deleted due to missingness)+++Wald df p +1 33.06396 1 8.917646e-09+test.new.var(old.vars, "cold.isc", 2)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.477550 1.612119 0.171044 2.792 0.00524 +cold.isc 0.004924 1.004937 0.006728 0.732 0.46420 + +Likelihood ratio test=28.72 on 2 df, p=5.811e-07 +n= 7525, number of events= 357 + (2250 observations deleted due to missingness)+++Wald df p +1 0.5357515 1 0.4641988+test.new.var(old.vars, "sex", 2)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.64499 1.90597 0.09558 6.748 1.5e-11 +sexMale -0.08657 0.91707 0.09374 -0.924 0.356 + +Likelihood ratio test=47.95 on 2 df, p=3.87e-11 +n= 9775, number of events= 465+++Wald df p +1 0.8528929 1 0.3557352+test.new.var(old.vars, "year", 2)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.63806 1.89281 0.09563 6.672 2.52e-11 +year -0.03310 0.96744 0.01548 -2.138 0.0325 + +Likelihood ratio test=51.69 on 2 df, p=5.967e-12 +n= 9775, number of events= 465+++Wald df p +1 4.573044 1 0.0324788+
age.rechas the smallest \(p\)-value+ +12.3
+tx.type+age.rec+ ?+++old.vars <- c("tx.type", "age.rec") + +test.new.var(old.vars, "hla.match", 3)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.512664 1.669733 0.112875 4.542 5.58e-06 +age.rec -0.048268 0.952878 0.008688 -5.556 2.77e-08 +hla.match -0.113954 0.892299 0.039929 -2.854 0.00432 + +Likelihood ratio test=81.92 on 3 df, p=< 2.2e-16 +n= 9535, number of events= 450 + (240 observations deleted due to missingness)+++Wald df p +1 8.144605 1 0.004318947+test.new.var(old.vars, "age.donor", 3)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.571325 1.770611 0.107377 5.321 1.03e-07 +age.rec -0.044085 0.956872 0.008718 -5.057 4.26e-07 +age.donor -0.009688 0.990359 0.003797 -2.551 0.0107 + +Likelihood ratio test=83.3 on 3 df, p=< 2.2e-16 +n= 9655, number of events= 463 + (120 observations deleted due to missingness)+++Wald df p +1 6.508773 1 0.01073435+test.new.var(old.vars, "cold.isc", 3)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.489852 1.632075 0.170731 2.869 0.00412 +age.rec -0.025545 0.974779 0.010019 -2.550 0.01078 +cold.isc 0.005637 1.005653 0.006709 0.840 0.40085 + +Likelihood ratio test=35.04 on 3 df, p=1.194e-07 +n= 7519, number of events= 357 + (2256 observations deleted due to missingness)+++Wald df p +1 0.705761 1 0.4008547+test.new.var(old.vars, "sex", 3)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.695122 2.003953 0.096164 7.229 4.88e-13 +age.rec -0.050004 0.951226 0.008569 -5.835 5.37e-09 +sexMale -0.130416 0.877730 0.094113 -1.386 0.166 + +Likelihood ratio test=80.63 on 3 df, p=< 2.2e-16 +n= 9766, number of events= 464 + (9 observations deleted due to missingness)+++Wald df p +1 1.920263 1 0.1658277+test.new.var(old.vars, "year", 3)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.687661 1.989059 0.096215 7.147 8.86e-13 +age.rec -0.049029 0.952153 0.008537 -5.743 9.30e-09 +year -0.033478 0.967076 0.015475 -2.163 0.0305 + +Likelihood ratio test=83.42 on 3 df, p=< 2.2e-16 +n= 9766, number of events= 464 + (9 observations deleted due to missingness)+++Wald df p +1 4.680122 1 0.03051359+
hla.matchhas the smallest \(p\)-value+ +12.4
+tx.type+age.rec+hla.match+ ?+++old.vars <- c("tx.type", "age.rec", "hla.match") + +test.new.var(old.vars, "age.donor", 4)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.408204 1.504113 0.121727 3.353 0.000798 +age.rec -0.043716 0.957225 0.008875 -4.926 8.39e-07 +hla.match -0.107306 0.898251 0.040127 -2.674 0.007491 +age.donor -0.008810 0.991229 0.003859 -2.283 0.022443 + +Likelihood ratio test=85.46 on 4 df, p=< 2.2e-16 +n= 9433, number of events= 449 + (342 observations deleted due to missingness)+++Wald df p +1 5.211086 1 0.02244331+test.new.var(old.vars, "cold.isc", 4)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.349594 1.418492 0.182948 1.911 0.0560 +age.rec -0.023299 0.976970 0.010119 -2.302 0.0213 +hla.match -0.071073 0.931394 0.044036 -1.614 0.1065 +cold.isc 0.006428 1.006449 0.006717 0.957 0.3386 + +Likelihood ratio test=33.68 on 4 df, p=8.682e-07 +n= 7381, number of events= 352 + (2394 observations deleted due to missingness)+++Wald df p +1 0.9158816 1 0.3385587+test.new.var(old.vars, "sex", 4)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.515100 1.673806 0.112902 4.562 5.06e-06 +age.rec -0.048944 0.952234 0.008721 -5.612 2.00e-08 +hla.match -0.112940 0.893204 0.039936 -2.828 0.00468 +sexMale -0.094882 0.909480 0.095783 -0.991 0.32188 + +Likelihood ratio test=82.9 on 4 df, p=< 2.2e-16 +n= 9535, number of events= 450 + (240 observations deleted due to missingness)+++Wald df p +1 0.9812753 1 0.3218842+test.new.var(old.vars, "year", 4)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.494911 1.640353 0.113403 4.364 1.28e-05 +age.rec -0.048129 0.953011 0.008689 -5.539 3.04e-08 +hla.match -0.122559 0.884654 0.040341 -3.038 0.00238 +year -0.035190 0.965422 0.015831 -2.223 0.02622 + +Likelihood ratio test=86.88 on 4 df, p=< 2.2e-16 +n= 9535, number of events= 450 + (240 observations deleted due to missingness)+++Wald df p +1 4.941346 1 0.02622161+
age.donorhas the smallest \(p\)-value+ +12.5
+tx.type+age.rec+hla.match+age.donor+ ?+++old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor") + +test.new.var(old.vars, "cold.isc", 5)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.355582 1.427011 0.188970 1.882 0.0599 +age.rec -0.021491 0.978738 0.010298 -2.087 0.0369 +hla.match -0.067709 0.934532 0.044223 -1.531 0.1257 +age.donor -0.003508 0.996499 0.004230 -0.829 0.4069 +cold.isc 0.004519 1.004530 0.006774 0.667 0.5047 + +Likelihood ratio test=34.04 on 5 df, p=2.333e-06 +n= 7347, number of events= 351 + (2428 observations deleted due to missingness)+++Wald df p +1 0.445129 1 0.5046572+test.new.var(old.vars, "sex", 5)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.411943 1.509748 0.121783 3.383 0.000718 +age.rec -0.044462 0.956512 0.008914 -4.988 6.1e-07 +hla.match -0.106379 0.899084 0.040136 -2.650 0.008038 +age.donor -0.008675 0.991362 0.003862 -2.246 0.024677 +sexMale -0.092710 0.911458 0.095941 -0.966 0.333885 + +Likelihood ratio test=86.39 on 5 df, p=< 2.2e-16 +n= 9433, number of events= 449 + (342 observations deleted due to missingness)+++Wald df p +1 0.9337712 1 0.3338849+test.new.var(old.vars, "year", 5)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.397903 1.488700 0.121903 3.264 0.00110 +age.rec -0.043831 0.957116 0.008875 -4.939 7.87e-07 +hla.match -0.115648 0.890789 0.040537 -2.853 0.00433 +age.donor -0.008325 0.991709 0.003854 -2.160 0.03078 +year -0.033109 0.967433 0.015844 -2.090 0.03664 + +Likelihood ratio test=89.84 on 5 df, p=< 2.2e-16 +n= 9433, number of events= 449 + (342 observations deleted due to missingness)+++Wald df p +1 4.367128 1 0.0366387+
yearhas the smallest \(p\)-value+ +12.6
+tx.type+age.rec+hla.match+age.donor+year+++old.vars <- c("tx.type", "age.rec", "hla.match", "age.donor", "year") + +test.new.var(old.vars, "cold.isc", 6)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.381855 1.464999 0.189901 2.011 0.0443 +age.rec -0.021369 0.978857 0.010295 -2.076 0.0379 +hla.match -0.073960 0.928708 0.044568 -1.660 0.0970 +age.donor -0.002981 0.997024 0.004226 -0.705 0.4806 +year -0.037375 0.963315 0.019227 -1.944 0.0519 +cold.isc 0.002743 1.002747 0.006825 0.402 0.6878 + +Likelihood ratio test=37.86 on 6 df, p=1.195e-06 +n= 7347, number of events= 351 + (2428 observations deleted due to missingness)+++Wald df p +1 0.1615171 1 0.6877636+test.new.var(old.vars, "sex", 6)+++Call: +coxph(formula = new.surv, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.401829 1.494556 0.121963 3.295 0.000985 +age.rec -0.044602 0.956378 0.008916 -5.003 5.66e-07 +hla.match -0.114750 0.891589 0.040546 -2.830 0.004653 +age.donor -0.008185 0.991848 0.003857 -2.122 0.033833 +year -0.033222 0.967324 0.015844 -2.097 0.036011 +sexMale -0.094191 0.910109 0.095956 -0.982 0.326293 + +Likelihood ratio test=90.8 on 6 df, p=< 2.2e-16 +n= 9433, number of events= 449 + (342 observations deleted due to missingness)+++Wald df p +1 0.9635531 1 0.3262933Neither has \(p\)-value less than 0.05. Stop.
++ +13 Full Model
++++cox.full+++Call: +coxph(formula = surv.full, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.0734859 1.0762534 0.0253724 2.896 0.00378 +age.rec 0.0154431 1.0155630 0.0020497 7.534 4.9e-14 +hla.match -0.0210826 0.9791381 0.0086077 -2.449 0.01431 +age.donor -0.0009874 0.9990131 0.0008841 -1.117 0.26405 +year 0.4429003 1.5572171 0.0057586 76.910 < 2e-16 + +Likelihood ratio test=8440 on 5 df, p=< 2.2e-16 +n= 9433, number of events= 9433 + (342 observations deleted due to missingness)+++++++ ++
+ +13.1 What happened to
+year?+++++++ ++
+++++++ ++
Probabily want to omit it
++ +13.2 Without
+year+++cox.full.1+++Call: +coxph(formula = surv.full.1, data = dat, method = "breslow") + + coef exp(coef) se(coef) z p +tx.typeLiving 0.0037484 1.0037554 0.0254736 0.147 0.8830 +age.rec 0.0139177 1.0140150 0.0020498 6.790 1.12e-11 +hla.match -0.0854685 0.9180821 0.0089546 -9.545 < 2e-16 +age.donor 0.0023071 1.0023098 0.0008657 2.665 0.0077 + +Likelihood ratio test=172.9 on 4 df, p=< 2.2e-16 +n= 9433, number of events= 9433 + (342 observations deleted due to missingness)+++++++ ++
+TODO: write a more generic function (UPDATE: FAILED)
+++
++ 14 Old
+++ +14.1 Death & Censored
++++dat |> + group_by(death) |> + summarise( + n = n(), + percentage = n() / nrow(dat) * 100 + )+++# A tibble: 2 × 3 + death n percentage + <int> <int> <dbl> +1 0 9310 95.2 +2 1 465 4.76Most of the patients (95.24%) were censored.
++++++++ ++
+++++++ ++
+++++hla.match age.donor age.rec cold.isc death year sex tx.type follow.up +1 2 25 15 6 0 2002 Female Living 0 +2 1 42 10 11 1 1999 Female Living 0 +3 1 9 14 8 0 2002 Male Living 0 +4 0 35 12 26 0 2002 Male Living 0 +5 2 17 12 28 1 1997 Male Living 0 +6 3 44 13 1 0 2002 Male Cadaveric 0 +7 1 34 12 1 0 2002 Male Cadaveric 0 +8 4 38 11 2 0 2002 Female Cadaveric 0 +9 0 23 10 6 0 2002 Female Living 0 +10 3 26 1 1 0 2002 Male Cadaveric 0 +11 NA 42 18 NA 0 2002 Male Cadaveric 0 +12 4 39 9 NA 0 2002 Female Cadaveric 0 +13 3 34 13 NA 0 2002 Male Cadaveric 0 +14 3 NA 14 2 0 2002 Female Cadaveric 0 +15 3 40 17 1 0 2002 Female Cadaveric 0 +16 3 22 14 2 0 2002 Female Cadaveric 0 +17 0 17 17 12 0 2002 Male Living 0 +18 3 NA 7 NA 0 2002 Male Cadaveric 0 +19 0 4 0 NA 1 2001 Female Living 0 +20 4 NA NA 1 0 2002 Female Cadaveric 0 +21 3 42 18 1 0 2002 Male Cadaveric 0 +22 1 19 15 21 1 1994 Male Living 0 +23 3 44 10 NA 0 2002 Male Cadaveric 0 +24 0 13 14 NA 0 2001 Male Living 0 +25 1 NA 6 1 0 2002 Male Cadaveric 0+++++hla.match age.donor age.rec cold.isc death year sex tx.type follow.up +1 2 25 15 6 0 2002 Female Living 0 +2 1 42 10 11 1 1999 Female Living 0 +3 1 9 14 8 0 2002 Male Living 0 +4 0 35 12 26 0 2002 Male Living 0 +5 2 17 12 28 1 1997 Male Living 0 +6 3 44 13 1 0 2002 Male Cadaveric 0+ +14.2 Cox Model - Age as a Continuous Variable
++++cox.age.rec <- coxph(Surv(follow.up, death) ~ age.rec, data = dat) +cox.age.rec |> summary()+++Call: +coxph(formula = Surv(follow.up, death) ~ age.rec, data = dat) + + n= 9766, number of events= 464 + (9 observations deleted due to missingness) + + coef exp(coef) se(coef) z Pr(>|z|) +age.rec -0.042550 0.958342 0.008434 -5.045 4.53e-07 *** +--- +Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 + + exp(coef) exp(-coef) lower .95 upper .95 +age.rec 0.9583 1.043 0.9426 0.9743 + +Concordance= 0.586 (se = 0.016 ) +Likelihood ratio test= 24.81 on 1 df, p=6e-07 +Wald test = 25.45 on 1 df, p=5e-07 +Score (logrank) test = 25.75 on 1 df, p=4e-07+ +14.3 Cox Model - Age as a Categorical Variable
++++++chisq df p +tx.type 2.46 1 0.11654 +age.rec 36.09 1 1.9e-09 +hla.match 8.63 1 0.00331 +age.donor 10.88 1 0.00097 +year 3.96 1 0.04666 +GLOBAL 52.00 5 5.4e-10+++++ ++
+++++ ++
+++++ ++
+++++ ++
+++++ ++
+++++ ++
+ +14.4 Table 1 (L)
++ +14.4.1 HLA match
+hla.match++++++ 0 1 2 3 4 5 6 <NA> + 784 1504 1553 3778 1278 365 279 234+++++++ ++
+++++++ ++
+++ +++Warning ++++TODO: change to percentage within each
+tx.type+++ +++Note ++++Living donors have less
+hla.matchthen cadaveric donors+ +14.4.2 Donor age
+age.donor+++mean(dat$age.donor, na.rm = TRUE)+++[1] 31.29952+median(dat$age.donor, na.rm = TRUE)+++[1] 33+++++++ ++
+++++++ ++
+++++++ ++
+++++++ ++
+++++++ ++
+++ +++Note ++++There is age difference between different
+tx.type+ +14.4.3 Recipient Age
+age.rec+++mean(dat$age.rec, na.rm = TRUE)+++[1] 11.64653+median(dat$age.rec, na.rm = TRUE)+++[1] 13+++++++ ++
+++++++ ++
+++++++ ++
+++++++ ++
+++++++ ++
+ +14.4.4 Cold Ischemia Time
+cold.isc+++mean(dat$cold.isc, na.rm = TRUE)+++[1] 10.85967+median(dat$cold.isc, na.rm = TRUE)+++[1] 7+++++++ ++
+++++++ ++
+++++++ ++
+ +14.4.5 Transplant Type
+tx.type++++++Cadaveric Living <NA> + 5148 4627 0+++dat$tx.type |> table()++++Cadaveric Living + 5148 4627+++++++ ++
+ +14.4.6 Year
+year++++++1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 + 728 734 666 736 787 813 789 745 740 842 706 834 655+++++++ ++
+++++++ ++
+++++++ ++
+++++++ ++
+++++++ ++
+ +14.5 Kaplan-Meier
++ +14.5.1 Overall (L)
++++++Call: survfit(formula = Surv(follow.up, death) ~ 1, data = dat) + + time n.risk n.event survival std.err lower 95% CI upper 95% CI + 4 4150 359 0.953 0.00252 0.948 0.958 + 8 1197 86 0.919 0.00449 0.910 0.928 + 12 48 20 0.888 0.00867 0.871 0.905+++++++ ++
+ +14.5.2
+tx.type(M)+++++++ ++
+ +14.6 Cox Model
++ +14.6.1
+deathDistribution (?)+++++++ ++
+ +14.6.2 Overall (L)
++++ +++Note ++++skip overall, jump directly to
+tx.type+++++# A tibble: 7 × 5 + fu.interval n.censored n.event n.at.risk hazard.rate + <dbl> <int> <int> <dbl> <dbl> +1 0.333 830 152 9775 0.0155 +2 0.667 491 37 8793 0.00421 +3 1 613 27 8265 0.00327 +4 2 1164 62 7625 0.00813 +5 3 1143 41 6399 0.00641 +6 4 1053 40 5215 0.00767 +7 5 919 31 4122 0.00752+ +14.6.3
+tx.type(M)+ +14.6.4
+age(continuous) (L)+ +14.6.5
+age(categorical) (M)+ +14.6.6 Full model (L)
++++++Call: +coxph(formula = Surv(follow.up, death) ~ hla.match + tx.type, + data = dat) + + n= 9541, number of events= 451 + (234 observations deleted due to missingness) + + coef exp(coef) se(coef) z Pr(>|z|) +hla.match -0.12460 0.88285 0.03959 -3.148 0.00165 ** +tx.typeLiving 0.45336 1.57359 0.11146 4.068 4.75e-05 *** +--- +Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 + + exp(coef) exp(-coef) lower .95 upper .95 +hla.match 0.8828 1.1327 0.8169 0.9541 +tx.typeLiving 1.5736 0.6355 1.2648 1.9578 + +Concordance= 0.611 (se = 0.014 ) +Likelihood ratio test= 52.43 on 2 df, p=4e-12 +Wald test = 51.54 on 2 df, p=6e-12 +Score (logrank) test = 53.06 on 2 df, p=3e-12+++Call: +coxph(formula = Surv(follow.up, death) ~ hla.match * tx.type, + data = dat) + + n= 9541, number of events= 451 + (234 observations deleted due to missingness) + + coef exp(coef) se(coef) z Pr(>|z|) +hla.match -0.08577 0.91780 0.07828 -1.096 0.2732 +tx.typeLiving 0.59989 1.82192 0.27919 2.149 0.0317 * +hla.match:tx.typeLiving -0.05232 0.94902 0.09086 -0.576 0.5647 +--- +Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 + + exp(coef) exp(-coef) lower .95 upper .95 +hla.match 0.9178 1.0896 0.7873 1.070 +tx.typeLiving 1.8219 0.5489 1.0541 3.149 +hla.match:tx.typeLiving 0.9490 1.0537 0.7942 1.134 + +Concordance= 0.611 (se = 0.014 ) +Likelihood ratio test= 52.77 on 3 df, p=2e-11 +Wald test = 52.64 on 3 df, p=2e-11 +Score (logrank) test = 54.76 on 3 df, p=8e-12+++Analysis of Deviance Table + Cox model: response is Surv(follow.up, death) + Model 1: ~ hla.match + tx.type + Model 2: ~ hla.match * tx.type + loglik Chisq Df Pr(>|Chi|) +1 -3850.6 +2 -3850.5 0.3327 1 0.5641+ +14.6.7 Discussion: include
+yearor not?+ 14.7 Assumption Testing
+## Why Survival Analysis (L)
+Motivation: study the distribution of time to event \(T\).
+Example: time of death after kidney transplant.
++++ @@ -633,7 +3900,7 @@ window.Quarto = {@@ -643,23 +3910,23 @@ window.Quarto = {- +
With this, all we need is to fit a linear model
t ~ Xorlog(t) ~ X-- @@ -1141,7 +4408,7 @@ window.Quarto = {@@ -1151,23 +4418,23 @@ window.Quarto = {- +
Scenario 1: the study ends at the year 2008?
-- @@ -1649,14 +4916,14 @@ window.Quarto = {- +
@@ -1666,23 +4933,23 @@ window.Quarto = {- +
Scenario 2: respondent 3 move away; loss follow up
-- @@ -2164,14 +5431,14 @@ window.Quarto = {-- +
@@ -2182,14 +5449,14 @@ window.Quarto = {- +
- +
- - -1.4 Challenges in Survival Analysis (L)
++ 14.8 Challenges in Survival Analysis (L)
+
- Right Censoring: only observe the event if it occurs before a certain time.
- Left Censoring: event has occurred prior to the start of a research @@ -2208,616 +5475,9 @@ window.Quarto = {
- Patients not dead before 2000 are not included in the dataset.
- -2 Method & Result
-- -2.1 Explain Dataset (M)
---- ---Things to highlight in figures -----
-- -
sex and recipient age are similar across
tx.type- -
Living donors have less
hla.matchthen cadaveric donors- -
There is age difference between different
tx.type- -2.1.1
-sex~tx.tpye--- ---Note -----
sexare similar acrosstx.type------- --
- -2.2 Table 1 (L)
-- -2.2.1 HLA match
-hla.match------ 0 1 2 3 4 5 6 <NA> - 784 1504 1553 3778 1278 365 279 234------- --
------- --
--- ---Warning ----TODO: change to percentage within each
-tx.type--- ---Note ----Living donors have less
-hla.matchthen cadaveric donors- -2.2.2 Donor age
-age.donor---mean(dat$age.donor, na.rm = TRUE)---[1] 31.29952-median(dat$age.donor, na.rm = TRUE)---[1] 33-----Warning: Removed 113 rows containing non-finite outside the scale range -(`stat_count()`).----- --
-----Warning: Removed 113 rows containing non-finite outside the scale range -(`stat_count()`).----- --
-----Warning: Removed 113 rows containing non-finite outside the scale range -(`stat_density()`).----- --
-----Warning: Removed 113 rows containing non-finite outside the scale range -(`stat_density()`).----- --
-----Warning: Removed 113 rows containing non-finite outside the scale range -(`stat_boxplot()`).----- --
--- ---Note ----There is age difference between different
-tx.type- -2.2.3 Recipient Age
-age.rec---mean(dat$age.rec, na.rm = TRUE)---[1] 11.64653-median(dat$age.rec, na.rm = TRUE)---[1] 13-----Warning: Removed 9 rows containing non-finite outside the scale range -(`stat_count()`).----- --
-----Warning: Removed 113 rows containing non-finite outside the scale range -(`stat_boxplot()`).----- --
-----Warning: Removed 9 rows containing non-finite outside the scale range -(`stat_count()`).----- --
-----Warning: Removed 9 rows containing non-finite outside the scale range -(`stat_density()`).----- --
-----Warning: Removed 2250 rows containing non-finite outside the scale range -(`stat_boxplot()`).----- --
- -2.2.4 Cold Ischemia Time
-cold.isc---mean(dat$cold.isc, na.rm = TRUE)---[1] 10.85967-median(dat$cold.isc, na.rm = TRUE)---[1] 7-----Warning: Removed 2250 rows containing non-finite outside the scale range -(`stat_density()`).----- --
-----Warning: Removed 2250 rows containing non-finite outside the scale range -(`stat_density()`).----- --
-----Warning: Removed 2250 rows containing non-finite outside the scale range -(`stat_density()`).----- --
- -2.2.5 Transplant Type
-tx.type------Cadaveric Living <NA> - 5148 4627 0---dat$tx.type |> table()----Cadaveric Living - 5148 4627------- --
- -2.2.6 Year
-year------1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 - 728 734 666 736 787 813 789 745 740 842 706 834 655------- --
------- --
------- --
-----Warning: Removed 9 rows containing non-finite outside the scale range -(`stat_boxplot()`).----- --
-----Warning: Removed 113 rows containing non-finite outside the scale range -(`stat_boxplot()`).----- --
- -2.3 Kaplan-Meier
-- -2.3.1 Overall (L)
------Call: survfit(formula = Surv(follow.up, death) ~ 1, data = dat) - time n.risk n.event survival std.err lower 95% CI upper 95% CI - 4 4150 359 0.953 0.00252 0.948 0.958 - 8 1197 86 0.919 0.00449 0.910 0.928 - 12 48 20 0.888 0.00867 0.871 0.905------- --
- -2.3.2
-tx.type(M)------- --
- -2.4 Cox Model
-- -2.4.1
-deathDistribution (?)------- --
- -2.4.2 Overall (L)
---- ---Note ----skip overall, jump directly to
-tx.type-----# A tibble: 7 × 5 - fu.interval n.censored n.event n.at.risk hazard.rate - <dbl> <int> <int> <dbl> <dbl> -1 0.333 830 152 9775 0.0155 -2 0.667 491 37 8793 0.00421 -3 1 613 27 8265 0.00327 -4 2 1164 62 7625 0.00813 -5 3 1143 41 6399 0.00641 -6 4 1053 40 5215 0.00767 -7 5 919 31 4122 0.00752- -2.4.3
-tx.type(M)- -2.4.4
-age(continuous) (L)- -2.4.5
-age(categorical) (M)- -2.4.6 Full model (L)
------Call: -coxph(formula = Surv(follow.up, death) ~ hla.match + tx.type, - data = dat) - - n= 9541, number of events= 451 - (234 observations deleted due to missingness) - - coef exp(coef) se(coef) z Pr(>|z|) -hla.match1 -0.08845 0.91535 0.17308 -0.511 0.60933 -hla.match2 -0.36340 0.69531 0.17919 -2.028 0.04256 * -hla.match3 -0.50810 0.60164 0.18298 -2.777 0.00549 ** -hla.match4 -0.65410 0.51991 0.22221 -2.944 0.00324 ** -hla.match5 -0.39445 0.67405 0.29410 -1.341 0.17985 -hla.match6 -0.51491 0.59755 0.31489 -1.635 0.10201 -tx.typeLiving 0.39049 1.47771 0.12688 3.078 0.00209 ** ---- -Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 - - exp(coef) exp(-coef) lower .95 upper .95 -hla.match1 0.9153 1.0925 0.6520 1.2850 -hla.match2 0.6953 1.4382 0.4894 0.9879 -hla.match3 0.6016 1.6621 0.4203 0.8612 -hla.match4 0.5199 1.9234 0.3363 0.8037 -hla.match5 0.6740 1.4836 0.3787 1.1996 -hla.match6 0.5976 1.6735 0.3224 1.1077 -tx.typeLiving 1.4777 0.6767 1.1524 1.8949 - -Concordance= 0.613 (se = 0.014 ) -Likelihood ratio test= 56.79 on 7 df, p=7e-10 -Wald test = 57.08 on 7 df, p=6e-10 -Score (logrank) test = 59.51 on 7 df, p=2e-10---Call: -coxph(formula = Surv(follow.up, death) ~ hla.match * tx.type, - data = dat) - - n= 9541, number of events= 451 - (234 observations deleted due to missingness) - - coef exp(coef) se(coef) z Pr(>|z|) -hla.match1 0.8094 2.2467 0.8165 0.991 0.3215 -hla.match2 1.1371 3.1176 0.7530 1.510 0.1311 -hla.match3 0.4217 1.5245 0.7140 0.591 0.5548 -hla.match4 0.1094 1.1156 0.7341 0.149 0.8816 -hla.match5 0.4754 1.6087 0.7820 0.608 0.5432 -hla.match6 0.5836 1.7925 0.8021 0.728 0.4669 -tx.typeLiving 1.3801 3.9752 0.7218 1.912 0.0559 . -hla.match1:tx.typeLiving -0.9614 0.3823 0.8355 -1.151 0.2498 -hla.match2:tx.typeLiving -1.6578 0.1905 0.7762 -2.136 0.0327 * -hla.match3:tx.typeLiving -1.0096 0.3644 0.7461 -1.353 0.1760 -hla.match4:tx.typeLiving -0.5175 0.5960 0.7863 -0.658 0.5105 -hla.match5:tx.typeLiving -0.8601 0.4231 0.8805 -0.977 0.3286 -hla.match6:tx.typeLiving -1.3342 0.2634 0.9113 -1.464 0.1432 ---- -Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 - - exp(coef) exp(-coef) lower .95 upper .95 -hla.match1 2.2467 0.4451 0.45341 11.1322 -hla.match2 3.1176 0.3208 0.71257 13.6400 -hla.match3 1.5245 0.6559 0.37617 6.1787 -hla.match4 1.1156 0.8964 0.26460 4.7032 -hla.match5 1.6087 0.6216 0.34737 7.4498 -hla.match6 1.7925 0.5579 0.37212 8.6342 -tx.typeLiving 3.9752 0.2516 0.96600 16.3580 -hla.match1:tx.typeLiving 0.3823 2.6155 0.07435 1.9662 -hla.match2:tx.typeLiving 0.1905 5.2480 0.04162 0.8723 -hla.match3:tx.typeLiving 0.3644 2.7444 0.08443 1.5726 -hla.match4:tx.typeLiving 0.5960 1.6778 0.12763 2.7836 -hla.match5:tx.typeLiving 0.4231 2.3635 0.07533 2.3763 -hla.match6:tx.typeLiving 0.2634 3.7971 0.04414 1.5712 - -Concordance= 0.615 (se = 0.014 ) -Likelihood ratio test= 66.78 on 13 df, p=3e-09 -Wald test = 65.9 on 13 df, p=5e-09 -Score (logrank) test = 69.92 on 13 df, p=8e-10---Analysis of Deviance Table - Cox model: response is Surv(follow.up, death) - Model 1: ~ hla.match + tx.type - Model 2: ~ hla.match * tx.type - loglik Chisq Df Pr(>|Chi|) -1 -3848.5 -2 -3843.5 9.9891 6 0.1251- -2.4.7 Discussion: include
-yearor not?- -2.5 Assumption Testing
-- 3 Conclusion (M + L)
-






































































































