Chapter 4 Cox Survival Regression

The following code fits the Proportional Cox Hazard model.

# Proportional Hazards Model #
recid.ph <- coxph(Surv(week, arrest) ~ fin + age + wexp + mar +paro + prio, data = recid)
summary(recid.ph)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age + wexp + mar + 
##     paro + prio, data = recid)
## 
##   n= 432, number of events= 114 
## 
##          coef exp(coef) se(coef)      z Pr(>|z|)   
## fin  -0.36554   0.69382  0.19090 -1.915  0.05552 . 
## age  -0.05633   0.94523  0.02189 -2.573  0.01007 * 
## wexp -0.15699   0.85471  0.21208 -0.740  0.45916   
## mar  -0.47130   0.62419  0.38027 -1.239  0.21520   
## paro -0.07792   0.92504  0.19530 -0.399  0.68991   
## prio  0.08966   1.09380  0.02871  3.123  0.00179 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## fin     0.6938     1.4413    0.4773    1.0087
## age     0.9452     1.0579    0.9055    0.9867
## wexp    0.8547     1.1700    0.5640    1.2952
## mar     0.6242     1.6021    0.2962    1.3152
## paro    0.9250     1.0810    0.6308    1.3564
## prio    1.0938     0.9142    1.0340    1.1571
## 
## Concordance= 0.639  (se = 0.027 )
## Likelihood ratio test= 32.14  on 6 df,   p=2e-05
## Wald test            = 30.79  on 6 df,   p=3e-05
## Score (logrank) test = 32.28  on 6 df,   p=1e-05
# Parameter Interpretation #
recid.ph2 <- coxph(Surv(week, arrest) ~ fin + age + prio, data = recid)
summary(recid.ph2)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age + prio, data = recid)
## 
##   n= 432, number of events= 114 
## 
##          coef exp(coef) se(coef)      z Pr(>|z|)    
## fin  -0.34695   0.70684  0.19025 -1.824 0.068197 .  
## age  -0.06711   0.93510  0.02085 -3.218 0.001289 ** 
## prio  0.09689   1.10174  0.02725  3.555 0.000378 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## fin     0.7068     1.4148    0.4868    1.0263
## age     0.9351     1.0694    0.8977    0.9741
## prio    1.1017     0.9077    1.0444    1.1622
## 
## Concordance= 0.63  (se = 0.027 )
## Likelihood ratio test= 29.05  on 3 df,   p=2e-06
## Wald test            = 27.94  on 3 df,   p=4e-06
## Score (logrank) test = 29.03  on 3 df,   p=2e-06
(exp(coef(recid.ph2))-1)*100
##       fin       age      prio 
## -29.31625  -6.49033  10.17427
# Automatic Selection Techniques #
full.model <- coxph(Surv(week, arrest) ~ fin + age + wexp + mar +paro + prio, 
                    data = recid)

empty.model <- coxph(Surv(week, arrest ) ~ 1, data = recid)

step.model <- step(empty.model, 
                      scope = list(lower=formula(empty.model), 
                                   upper=formula(full.model)), 
                      direction = "both")
## Start:  AIC=1350.76
## Surv(week, arrest) ~ 1
## 
##        Df    AIC
## + age   1 1337.5
## + prio  1 1340.8
## + wexp  1 1343.1
## + mar   1 1348.1
## + fin   1 1348.9
## <none>    1350.8
## + paro  1 1352.4
## 
## Step:  AIC=1337.49
## Surv(week, arrest) ~ age
## 
##        Df    AIC
## + prio  1 1329.1
## + wexp  1 1336.4
## + fin   1 1336.5
## <none>    1337.5
## + mar   1 1337.5
## + paro  1 1338.6
## - age   1 1350.8
## 
## Step:  AIC=1329.08
## Surv(week, arrest) ~ age + prio
## 
##        Df    AIC
## + fin   1 1327.7
## + mar   1 1329.0
## <none>    1329.1
## + wexp  1 1330.0
## + paro  1 1330.9
## - prio  1 1337.5
## - age   1 1340.8
## 
## Step:  AIC=1327.71
## Surv(week, arrest) ~ age + prio + fin
## 
##        Df    AIC
## + mar   1 1327.3
## <none>    1327.7
## + wexp  1 1328.6
## - fin   1 1329.1
## + paro  1 1329.4
## - prio  1 1336.5
## - age   1 1338.3
## 
## Step:  AIC=1327.35
## Surv(week, arrest) ~ age + prio + fin + mar
## 
##        Df    AIC
## <none>    1327.3
## - mar   1 1327.7
## + wexp  1 1328.8
## - fin   1 1329.0
## + paro  1 1329.2
## - age   1 1335.4
## - prio  1 1336.2
summary(step.model)
## Call:
## coxph(formula = Surv(week, arrest) ~ age + prio + fin + mar, 
##     data = recid)
## 
##   n= 432, number of events= 114 
## 
##          coef exp(coef) se(coef)      z Pr(>|z|)    
## age  -0.06042   0.94137  0.02085 -2.897  0.00376 ** 
## prio  0.09751   1.10243  0.02722  3.583  0.00034 ***
## fin  -0.36020   0.69753  0.19049 -1.891  0.05864 .  
## mar  -0.53312   0.58677  0.37276 -1.430  0.15266    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## age     0.9414     1.0623    0.9037    0.9806
## prio    1.1024     0.9071    1.0452    1.1628
## fin     0.6975     1.4336    0.4802    1.0132
## mar     0.5868     1.7042    0.2826    1.2183
## 
## Concordance= 0.633  (se = 0.027 )
## Likelihood ratio test= 31.41  on 4 df,   p=3e-06
## Wald test            = 29.98  on 4 df,   p=5e-06
## Score (logrank) test = 31.25  on 4 df,   p=3e-06
full.model <- coxph(Surv(week, arrest) ~ fin + age  + wexp + mar +paro + prio, 
                    data = recid)

back.model <- step(full.model, direction = "backward")
## Start:  AIC=1330.62
## Surv(week, arrest) ~ fin + age + wexp + mar + paro + prio
## 
##        Df    AIC
## - paro  1 1328.8
## - wexp  1 1329.2
## - mar   1 1330.3
## <none>    1330.6
## - fin   1 1332.3
## - age   1 1336.3
## - prio  1 1337.2
## 
## Step:  AIC=1328.78
## Surv(week, arrest) ~ fin + age + wexp + mar + prio
## 
##        Df    AIC
## - wexp  1 1327.3
## - mar   1 1328.6
## <none>    1328.8
## - fin   1 1330.4
## - age   1 1334.3
## - prio  1 1335.8
## 
## Step:  AIC=1327.35
## Surv(week, arrest) ~ fin + age + mar + prio
## 
##        Df    AIC
## <none>    1327.3
## - mar   1 1327.7
## - fin   1 1329.0
## - age   1 1335.4
## - prio  1 1336.2
summary(back.model)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age + mar + prio, 
##     data = recid)
## 
##   n= 432, number of events= 114 
## 
##          coef exp(coef) se(coef)      z Pr(>|z|)    
## fin  -0.36020   0.69753  0.19049 -1.891  0.05864 .  
## age  -0.06042   0.94137  0.02085 -2.897  0.00376 ** 
## mar  -0.53312   0.58677  0.37276 -1.430  0.15266    
## prio  0.09751   1.10243  0.02722  3.583  0.00034 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## fin     0.6975     1.4336    0.4802    1.0132
## age     0.9414     1.0623    0.9037    0.9806
## mar     0.5868     1.7042    0.2826    1.2183
## prio    1.1024     0.9071    1.0452    1.1628
## 
## Concordance= 0.633  (se = 0.027 )
## Likelihood ratio test= 31.41  on 4 df,   p=3e-06
## Wald test            = 29.98  on 4 df,   p=5e-06
## Score (logrank) test = 31.25  on 4 df,   p=3e-06
full.model <- coxph(Surv(week, arrest) ~ fin + age  + wexp + mar +paro + prio, 
                    data = recid)

empty.model <- coxph(Surv(week, arrest) ~ 1, data = recid)

for.model <- step(empty.model, 
                  scope = list(lower=formula(empty.model), 
                               upper=formula(full.model)), 
                  direction = "forward")
## Start:  AIC=1350.76
## Surv(week, arrest) ~ 1
## 
##        Df    AIC
## + age   1 1337.5
## + prio  1 1340.8
## + wexp  1 1343.1
## + mar   1 1348.1
## + fin   1 1348.9
## <none>    1350.8
## + paro  1 1352.4
## 
## Step:  AIC=1337.49
## Surv(week, arrest) ~ age
## 
##        Df    AIC
## + prio  1 1329.1
## + wexp  1 1336.4
## + fin   1 1336.5
## <none>    1337.5
## + mar   1 1337.5
## + paro  1 1338.6
## 
## Step:  AIC=1329.08
## Surv(week, arrest) ~ age + prio
## 
##        Df    AIC
## + fin   1 1327.7
## + mar   1 1329.0
## <none>    1329.1
## + wexp  1 1330.0
## + paro  1 1330.9
## 
## Step:  AIC=1327.71
## Surv(week, arrest) ~ age + prio + fin
## 
##        Df    AIC
## + mar   1 1327.3
## <none>    1327.7
## + wexp  1 1328.6
## + paro  1 1329.4
## 
## Step:  AIC=1327.35
## Surv(week, arrest) ~ age + prio + fin + mar
## 
##        Df    AIC
## <none>    1327.3
## + wexp  1 1328.8
## + paro  1 1329.2
summary(for.model)
## Call:
## coxph(formula = Surv(week, arrest) ~ age + prio + fin + mar, 
##     data = recid)
## 
##   n= 432, number of events= 114 
## 
##          coef exp(coef) se(coef)      z Pr(>|z|)    
## age  -0.06042   0.94137  0.02085 -2.897  0.00376 ** 
## prio  0.09751   1.10243  0.02722  3.583  0.00034 ***
## fin  -0.36020   0.69753  0.19049 -1.891  0.05864 .  
## mar  -0.53312   0.58677  0.37276 -1.430  0.15266    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## age     0.9414     1.0623    0.9037    0.9806
## prio    1.1024     0.9071    1.0452    1.1628
## fin     0.6975     1.4336    0.4802    1.0132
## mar     0.5868     1.7042    0.2826    1.2183
## 
## Concordance= 0.633  (se = 0.027 )
## Likelihood ratio test= 31.41  on 4 df,   p=3e-06
## Wald test            = 29.98  on 4 df,   p=5e-06
## Score (logrank) test = 31.25  on 4 df,   p=3e-06
# Estimated Survival Curves #
newdata <- data.frame(fin = c(1, 0), age = 30, wexp = c(1, 0),
                      mar = 0, paro = 0, prio = c(0, 4))

ggsurvplot(survfit(recid.ph, newdata), data = newdata, break.y.by = 0.1,
           palette = c("purple", "black"), ylab = "survival probability",
           xlab = "week", legend.labs = c("1", "2"), legend.title = "subject")
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the survminer package.
##   Please report the issue at <https://github.com/kassambara/survminer/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

4.0.1 Python code for Cox regression

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import CoxPHFitter

recid = r.recid

recid_ph=recid[['fin','age', 'prio','arrest','week']]
cph = CoxPHFitter()
cph.fit(recid_ph, duration_col='week', event_col='arrest')
## <lifelines.CoxPHFitter: fitted with 432 total observations, 318 right-censored observations>
cph.print_summary()
## <lifelines.CoxPHFitter: fitted with 432 total observations, 318 right-censored observations>
##              duration col = 'week'
##                 event col = 'arrest'
##       baseline estimation = breslow
##    number of observations = 432
## number of events observed = 114
##    partial log-likelihood = -660.86
##          time fit was run = 2024-11-04 15:34:08 UTC
## 
## ---
##            coef exp(coef)  se(coef)  coef lower 95%  coef upper 95% exp(coef) lower 95% exp(coef) upper 95%
## covariate                                                                                                  
## fin       -0.35      0.71      0.19           -0.72            0.03                0.49                1.03
## age       -0.07      0.94      0.02           -0.11           -0.03                0.90                0.97
## prio       0.10      1.10      0.03            0.04            0.15                1.04                1.16
## 
##            cmp to     z      p  -log2(p)
## covariate                               
## fin          0.00 -1.82   0.07      3.87
## age          0.00 -3.22 <0.005      9.60
## prio         0.00  3.56 <0.005     11.37
## ---
## Concordance = 0.63
## Partial AIC = 1327.71
## log-likelihood ratio test = 29.05 on 3 df
## -log2(p) of ll-ratio test = 18.80

cph.plot()

cph.plot_partial_effects_on_outcome(covariates='prio', values=[0, 2, 4, 6, 8, 10], cmap='coolwarm')

4.1 Diagnostics

Assumptions made for the Cox PH model is linearity for the continuous variables and proportional hazards throughout time (i.e. at EACH time point, the proportion between the hazards remains the same!). We can assess both of these through use of residuals.

# Concordance 
recid.ph = coxph(Surv(week, arrest) ~ fin + age + prio, data = recid)
concordance(recid.ph)
## Call:
## concordance.coxph(object = recid.ph)
## 
## n= 432 
## Concordance= 0.6302 se= 0.02729
## concordant discordant     tied.x     tied.y    tied.xy 
##      26736      15651        195        111          0
## Martingale residuals...Linearity

recid.lin <- coxph(Surv(week, arrest) ~  age + prio + fin, data = recid)
survminer::ggcoxfunctional(recid.lin,data=recid)

## Try transformations or can bin
recid1<-recid %>% mutate(agebin = case_when(
  age < 20 ~ 0,
  age < 30 ~ 1,
  age >= 30 ~2))

recid.ph1 = coxph(Surv(week, arrest) ~ fin + factor(agebin) + prio, data = recid1)

# Proportional Hazard Test - Schoenfeld Residuals 
recid.ph.zph <- cox.zph(recid.ph1)
recid.ph.zph
##                chisq df    p
## fin            0.066  1 0.80
## factor(agebin) 3.399  2 0.18
## prio           0.276  1 0.60
## GLOBAL         3.880  4 0.42
ggcoxzph(recid.ph.zph)

To fit time dependent coefficients:

# Time-Dependent Coefficients #
recid.ph.tdc <- coxph(Surv(week, arrest) ~ fin +  wexp + mar + paro + prio+ age + tt(age), data = recid,
                 tt = function(x, time, ...){x*log(time)})
summary(recid.ph.tdc)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + wexp + mar + paro + 
##     prio + age + tt(age), data = recid, tt = function(x, time, 
##     ...) {
##     x * log(time)
## })
## 
##   n= 432, number of events= 114 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## fin     -0.36527   0.69401  0.19087 -1.914  0.05566 . 
## wexp    -0.13317   0.87531  0.21247 -0.627  0.53080   
## mar     -0.45279   0.63585  0.38041 -1.190  0.23394   
## paro    -0.08490   0.91860  0.19534 -0.435  0.66382   
## prio     0.09177   1.09611  0.02880  3.186  0.00144 **
## age      0.12174   1.12946  0.06535  1.863  0.06249 . 
## tt(age) -0.05931   0.94242  0.02182 -2.718  0.00658 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## fin        0.6940     1.4409    0.4774    1.0089
## wexp       0.8753     1.1424    0.5772    1.3275
## mar        0.6358     1.5727    0.3017    1.3402
## paro       0.9186     1.0886    0.6264    1.3471
## prio       1.0961     0.9123    1.0359    1.1598
## age        1.1295     0.8854    0.9937    1.2838
## tt(age)    0.9424     1.0611    0.9030    0.9836
## 
## Concordance= 0.648  (se = 0.027 )
## Likelihood ratio test= 38.72  on 7 df,   p=2e-06
## Wald test            = 35.09  on 7 df,   p=1e-05
## Score (logrank) test = 36.97  on 7 df,   p=5e-06
recid.ph.tdc <- coxph(Surv(week, arrest) ~  fin + prio +  age + tt(age), data = recid,
        tt = function(x, time, ...){x*log(time)})
summary(recid.ph.tdc)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + prio + age + tt(age), 
##     data = recid, tt = function(x, time, ...) {
##         x * log(time)
##     })
## 
##   n= 432, number of events= 114 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)    
## fin     -0.34878   0.70555  0.19025 -1.833 0.066762 .  
## prio     0.09837   1.10337  0.02731  3.603 0.000315 ***
## age      0.11991   1.12739  0.06623  1.810 0.070220 .  
## tt(age) -0.06195   0.93993  0.02207 -2.807 0.005005 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## fin        0.7055     1.4173    0.4859    1.0244
## prio       1.1034     0.9063    1.0459    1.1640
## age        1.1274     0.8870    0.9901    1.2837
## tt(age)    0.9399     1.0639    0.9001    0.9815
## 
## Concordance= 0.635  (se = 0.028 )
## Likelihood ratio test= 36  on 4 df,   p=3e-07
## Wald test            = 32.41  on 4 df,   p=2e-06
## Score (logrank) test = 33.75  on 4 df,   p=8e-07

The following code is for time-varying variables (values of the variable that change over time). Notice that we are using a different data set (needs to be structured differently).

# Time Varying Variables #
recid_long.ph <- coxph(Surv(start, stop, arrested) ~ fin + age + prio + employed, data = recid_long)
summary(recid_long.ph)
## Call:
## coxph(formula = Surv(start, stop, arrested) ~ fin + age + prio + 
##     employed, data = recid_long)
## 
##   n= 19809, number of events= 114 
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)    
## fin      -0.33051   0.71856  0.19012 -1.738  0.08214 .  
## age      -0.04977   0.95145  0.02053 -2.424  0.01537 *  
## prio      0.08364   1.08724  0.02775  3.014  0.00258 ** 
## employed -1.34815   0.25972  0.24928 -5.408 6.37e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## fin         0.7186     1.3917    0.4950    1.0430
## age         0.9515     1.0510    0.9139    0.9905
## prio        1.0872     0.9198    1.0297    1.1480
## employed    0.2597     3.8503    0.1593    0.4233
## 
## Concordance= 0.703  (se = 0.023 )
## Likelihood ratio test= 66.19  on 4 df,   p=1e-13
## Wald test            = 53.93  on 4 df,   p=5e-11
## Score (logrank) test = 62.05  on 4 df,   p=1e-12
recid_lag.ph <- coxph(Surv(start, stop, arrested) ~ fin + age +  prio + employed, data = recid_lag)
summary(recid_lag.ph)
## Call:
## coxph(formula = Surv(start, stop, arrested) ~ fin + age + prio + 
##     employed, data = recid_lag)
## 
##   n= 19377, number of events= 113 
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)    
## fin      -0.32464   0.72278  0.19077 -1.702 0.088803 .  
## age      -0.05462   0.94685  0.02073 -2.635 0.008409 ** 
## prio      0.09158   1.09590  0.02753  3.326 0.000880 ***
## employed -0.81123   0.44431  0.21648 -3.747 0.000179 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## fin         0.7228     1.3835    0.4973    1.0505
## age         0.9468     1.0561    0.9092    0.9861
## prio        1.0959     0.9125    1.0383    1.1567
## employed    0.4443     2.2507    0.2907    0.6791
## 
## Concordance= 0.665  (se = 0.027 )
## Likelihood ratio test= 44.5  on 4 df,   p=5e-09
## Wald test            = 40.91  on 4 df,   p=3e-08
## Score (logrank) test = 43.68  on 4 df,   p=7e-09

4.1.1 Python for Diagnostics

For testing proportional hazard assumption:

cph.check_assumptions(recid_ph, p_value_threshold = 0.05)
## The ``p_value_threshold`` is set at 0.05. Even under the null hypothesis of no violations, some
## covariates will be below the threshold by chance. This is compounded when there are many covariates.
## Similarly, when there are lots of observations, even minor deviances from the proportional hazard
## assumption will be flagged.
## 
## With that in mind, it's best to use a combination of statistical tests and visual tests to determine
## the most serious violations. Produce visual plots using ``check_assumptions(..., show_plots=True)``
## and looking for non-constant lines. See link [A] below for a full example.
## 
## <lifelines.StatisticalResult: proportional_hazard_test>
##  null_distribution = chi squared
## degrees_of_freedom = 1
##              model = <lifelines.CoxPHFitter: fitted with 432 total observations, 318 right-censored observations>
##          test_name = proportional_hazard_test
## 
## ---
##            test_statistic    p  -log2(p)
## age  km              6.29 0.01      6.37
##      rank            6.62 0.01      6.63
## fin  km              0.00 1.00      0.01
##      rank            0.00 0.98      0.03
## prio km              0.82 0.37      1.45
##      rank            0.77 0.38      1.40
## 
## 
## 1. Variable 'age' failed the non-proportional test: p-value is 0.0101.
## 
##    Advice 1: the functional form of the variable 'age' might be incorrect. That is, there may be
## non-linear terms missing. The proportional hazard test used is very sensitive to incorrect
## functional forms. See documentation in link [D] below on how to specify a functional form.
## 
##    Advice 2: try binning the variable 'age' using pd.cut, and then specify it in `strata=['age',
## ...]` in the call in `.fit`. See documentation in link [B] below.
## 
##    Advice 3: try adding an interaction term with your time variable. See documentation in link [C]
## below.
## 
## 
## ---
## [A]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html
## [B]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Bin-variable-and-stratify-on-it
## [C]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Introduce-time-varying-covariates
## [D]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Modify-the-functional-form
## [E]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Stratification
## 
## []

Python cannot program time-varying coefficients. However, it is capable of doing time-varying variables:

from lifelines import CoxTimeVaryingFitter

recid_1=r.recid_long
recid_long=recid_1[['fin', 'age', 'prio', 'employed','start','stop','subj','arrested']]
ctv = CoxTimeVaryingFitter()
ctv.fit(recid_long, id_col="subj", event_col="arrested", start_col="start", stop_col="stop", show_progress=True)
## Iteration 1: norm_delta = 6.45e-01, step_size = 0.9500, log_lik = -675.38063, newton_decrement = 3.10e+01, seconds_since_start = 0.0
## Iteration 2: norm_delta = 1.68e-01, step_size = 0.9500, log_lik = -643.80170, newton_decrement = 1.44e+00, seconds_since_start = 0.0
## Iteration 3: norm_delta = 2.27e-02, step_size = 0.9500, log_lik = -642.30569, newton_decrement = 2.16e-02, seconds_since_start = 0.0
## Iteration 4: norm_delta = 1.48e-03, step_size = 1.0000, log_lik = -642.28397, newton_decrement = 8.06e-05, seconds_since_start = 0.0
## Iteration 5: norm_delta = 1.14e-06, step_size = 1.0000, log_lik = -642.28389, newton_decrement = 4.71e-11, seconds_since_start = 0.0
## Convergence completed after 5 iterations.
## <lifelines.CoxTimeVaryingFitter: fitted with 19809 periods, 432 subjects, 114 events>
ctv.print_summary()
## <lifelines.CoxTimeVaryingFitter: fitted with 19809 periods, 432 subjects, 114 events>
##          event col = 'arrested'
## number of subjects = 432
##  number of periods = 19809
##   number of events = 114
## partial log-likelihood = -642.28
##   time fit was run = 2024-11-04 15:34:11 UTC
## 
## ---
##            coef exp(coef)  se(coef)  coef lower 95%  coef upper 95% exp(coef) lower 95% exp(coef) upper 95%
## covariate                                                                                                  
## fin       -0.33      0.72      0.19           -0.70            0.04                0.50                1.04
## age       -0.05      0.95      0.02           -0.09           -0.01                0.91                0.99
## prio       0.08      1.09      0.03            0.03            0.14                1.03                1.15
## employed  -1.35      0.26      0.25           -1.84           -0.86                0.16                0.42
## 
##            cmp to     z      p  -log2(p)
## covariate                               
## fin          0.00 -1.74   0.08      3.61
## age          0.00 -2.42   0.02      6.02
## prio         0.00  3.01 <0.005      8.60
## employed     0.00 -5.41 <0.005     23.90
## ---
## Partial AIC = 1292.57
## log-likelihood ratio test = 66.19 on 4 df
## -log2(p) of ll-ratio test = 42.66