You are on page 1of 28

Introduction to Survival Analysis using R

Setia Pramana
July 2, 2014

1 One Group Kaplan Meier Curve


> ## 1 Group Kaplan Meier Curve ##
> ## Loading Data ##
> hmohiv<-read.table("http://www.ats.ucla.edu/stat/r/examples/asa/hmohiv.csv", sep=",", head
> # select specific ID
> attach(hmohiv)
> mini <-hmohiv[ID<=10,1:5]
> mini

ID time age drug censor


1 1 5 46 0 1
2 2 6 35 1 0
3 3 8 30 1 1
4 4 3 30 1 1
5 5 22 36 0 1
6 6 1 32 1 0
7 7 7 36 1 1
8 8 9 31 1 1
9 9 3 48 0 1
10 10 12 47 0 1

> library(survival)
> mini.surv <- survfit(Surv(time, censor)~ drug, conf.type="none", data=mini)
> summary(mini.surv)

Call: survfit(formula = Surv(time, censor) ~ drug, data = mini, conf.type = "none")

drug=0
time n.risk n.event survival std.err
3 4 1 0.75 0.217
5 3 1 0.50 0.250
12 2 1 0.25 0.217
22 1 1 0.00 NaN

1
drug=1
time n.risk n.event survival std.err
3 5 1 0.800 0.179
7 3 1 0.533 0.248
8 2 1 0.267 0.226
9 1 1 0.000 NaN

> mini.surv

Call: survfit(formula = Surv(time, censor) ~ drug, data = mini, conf.type = "none")

records n.max n.start events median


drug=0 4 4 4 4 8.5
drug=1 6 6 6 4 8.0

> plot(mini.surv, xlab="Time", ylab="Survival Probability", lty=1:2, col=c(1,2))


> legend("topright",c( "Control","treatment"),lty=1:2, col=1:2,lwd=2)
> survdiff(Surv(time, censor) ~ drug, data=mini)

Call:
survdiff(formula = Surv(time, censor) ~ drug, data = mini)

N Observed Expected (O-E)^2/E (O-E)^2/V


drug=0 4 4 4.88 0.160 0.563
drug=1 6 4 3.12 0.251 0.563

Chisq= 0.6 on 1 degrees of freedom, p= 0.453

>

2
1.0
Control
treatment
0.8
Survival Probability

0.6
0.4
0.2
0.0

0 5 10 15 20

Time

2 Two Groups Kaplan Meier Curve


> twogroup <-read.table("http://www.ats.ucla.edu/stat/r/examples/asa/minitest.txt", header =
> twogroup

time censor drug


1 3 1 0
2 4 0 0
3 5 1 0
4 22 1 0
5 34 1 0
6 2 1 1
7 3 1 1
8 4 0 1
9 7 1 1
10 11 1 1

> #fitting survival ##


>
> fit2 <- survfit(Surv(time, censor)~ drug, conf.type="none", data=twogroup)
> summary(fit2)

3
Call: survfit(formula = Surv(time, censor) ~ drug, data = twogroup,
conf.type = "none")

drug=0
time n.risk n.event survival std.err
3 5 1 0.800 0.179
5 3 1 0.533 0.248
22 2 1 0.267 0.226
34 1 1 0.000 NaN

drug=1
time n.risk n.event survival std.err
2 5 1 0.8 0.179
3 4 1 0.6 0.219
7 2 1 0.3 0.239
11 1 1 0.0 NaN

> plot(fit2, lty=1:2, col=c(1,2))


> legend("topright",c("Treatment", "Control"),lty=1:2, col=1:2,lwd=2)
1.0

Treatment
Control
0.8
0.6
0.4
0.2
0.0

0 5 10 15 20 25 30 35

4
3 Log-Rank Test
> survdiff(Surv(time, censor) ~ drug, data=twogroup)

Call:
survdiff(formula = Surv(time, censor) ~ drug, data = twogroup)

N Observed Expected (O-E)^2/E (O-E)^2/V


drug=0 5 4 5.38 0.353 1.36
drug=1 5 4 2.62 0.724 1.36

Chisq= 1.4 on 1 degrees of freedom, p= 0.243

>

> ## Data Leukeumia


>
>
> library(survival)
> timerem <- c(1,1,2,2,3,4,4,5,5,8,8,8,8,11,11,12,12,15,17,22, 23,
+ 6,6,6,6,7,9,10, 10,11,13,16,17,19,20,22,23,25,32,32,34,35)
> censor <- c(rep(1,21), 0,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0)
> group <- rep(c("control","6mer"),each=21)
> length(censor)

[1] 42

> leukdata <- data.frame(time=timerem,censor,group)


> fit2.leuk <- survfit(Surv(time, censor)~ group, data=leukdata)
> summary(fit2.leuk )

Call: survfit(formula = Surv(time, censor) ~ group, data = leukdata)

group=6mer
time n.risk n.event survival std.err lower 95% CI upper 95% CI
6 21 3 0.857 0.0764 0.720 1.000
7 17 1 0.807 0.0869 0.653 0.996
10 15 1 0.753 0.0963 0.586 0.968
13 12 1 0.690 0.1068 0.510 0.935
16 11 1 0.627 0.1141 0.439 0.896
22 7 1 0.538 0.1282 0.337 0.858
23 6 1 0.448 0.1346 0.249 0.807

group=control
time n.risk n.event survival std.err lower 95% CI upper 95% CI
1 21 2 0.9048 0.0641 0.78754 1.000
2 19 2 0.8095 0.0857 0.65785 0.996

5
3 17 1 0.7619 0.0929 0.59988 0.968
4 16 2 0.6667 0.1029 0.49268 0.902
5 14 2 0.5714 0.1080 0.39455 0.828
8 12 4 0.3810 0.1060 0.22085 0.657
11 8 2 0.2857 0.0986 0.14529 0.562
12 6 2 0.1905 0.0857 0.07887 0.460
15 4 1 0.1429 0.0764 0.05011 0.407
17 3 1 0.0952 0.0641 0.02549 0.356
22 2 1 0.0476 0.0465 0.00703 0.322
23 1 1 0.0000 NaN NA NA

> plot(fit2.leuk , col=c(1,2), lwd=2,lty=1:2, xlab="Weeks", ylab="Survival")


> legend("topright",c("Treatment", "Control"),lty=1:2, col=1:2,lwd=2)
> survdiff(Surv(time, censor) ~ group, data=leukdata)

Call:
survdiff(formula = Surv(time, censor) ~ group, data = leukdata)

N Observed Expected (O-E)^2/E (O-E)^2/V


group=6mer 21 9 19.3 5.46 16.8
group=control 21 21 10.7 9.77 16.8

Chisq= 16.8 on 1 degrees of freedom, p= 4.17e-05

>
>

6
1.0
Treatment
Control
0.8
0.6
Survival

0.4
0.2
0.0

0 5 10 15 20 25 30 35

Weeks

4 CoX Proportional Hazard


> #install.packages("KMsurv")
> library(KMsurv)
> data(drug6mp)
> cphfit <- coxph(Surv(time, censor) ~ group, data=leukdata, method="breslow")
> summary(cphfit)
Call:
coxph(formula = Surv(time, censor) ~ group, data = leukdata,
method = "breslow")

n= 42, number of events= 30

coef exp(coef) se(coef) z Pr(>|z|)


groupcontrol 1.5092 4.5231 0.4096 3.685 0.000229 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

exp(coef) exp(-coef) lower .95 upper .95


groupcontrol 4.523 0.2211 2.027 10.09

7
Concordance= 0.69 (se = 0.053 )
Rsquare= 0.304 (max possible= 0.989 )
Likelihood ratio test= 15.21 on 1 df, p=9.615e-05
Wald test = 13.58 on 1 df, p=0.0002288
Score (logrank) test = 15.93 on 1 df, p=6.571e-05

Data Burn:
T1: Time to excision or on study time D1: Excision indicator: 1=yes 0=no
(censoring)
Z1: Treatment: 0-routine bathing 1-Body cleansing Z2: Gender (0=male
1=female) Z3: Race 0=nonwhite 1=white Z11: Type of burn: 1=chemical,
2=scald, 3=electric, 4=flame

> data(burn)
> attach(burn)
> head(burn)

Obs Z1 Z2 Z3 Z4 Z5 Z6 Z7 Z8 Z9 Z10 Z11 T1 D1 T2 D2 T3 D3


1 1 0 0 0 15 0 0 1 1 0 0 2 12 0 12 0 12 0
2 2 0 0 1 20 0 0 1 0 0 0 4 9 0 9 0 9 0
3 3 0 0 1 15 0 0 0 1 1 0 2 13 0 13 0 7 1
4 4 0 0 0 20 1 0 1 0 0 0 2 11 1 29 0 29 0
5 5 0 0 1 70 1 1 1 1 0 0 2 28 1 31 0 4 1
6 6 0 0 1 20 1 0 1 0 0 0 4 11 0 11 0 8 1

> surv1 <- Surv(T1, D1)


> fitph <- coxph(surv1 ~ Z1 + as.factor(Z11), method="breslow")
> summary(fitph)

Call:
coxph(formula = surv1 ~ Z1 + as.factor(Z11), method = "breslow")

n= 154, number of events= 99

coef exp(coef) se(coef) z Pr(>|z|)


Z1 0.4969 1.6436 0.2084 2.385 0.0171 *
as.factor(Z11)2 -0.8775 0.4158 0.4980 -1.762 0.0781 .
as.factor(Z11)3 -1.6497 0.1921 0.8025 -2.056 0.0398 *
as.factor(Z11)4 -0.4071 0.6656 0.3955 -1.029 0.3033
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

exp(coef) exp(-coef) lower .95 upper .95


Z1 1.6436 0.6084 1.09252 2.473
as.factor(Z11)2 0.4158 2.4049 0.15668 1.104
as.factor(Z11)3 0.1921 5.2055 0.03985 0.926

8
as.factor(Z11)4 0.6656 1.5025 0.30661 1.445

Concordance= 0.632 (se = 0.033 )


Rsquare= 0.09 (max possible= 0.996 )
Likelihood ratio test= 14.57 on 4 df, p=0.005687
Wald test = 12.77 on 4 df, p=0.01248
Score (logrank) test = 13.52 on 4 df, p=0.009008

> #extract the result


>
> fitph$coefficients # may use my.fit$coeff instead

Z1 as.factor(Z11)2 as.factor(Z11)3 as.factor(Z11)4


0.4968908 -0.8774951 -1.6497212 -0.4071141

> fitph$var # I^(-1), estimated cov matrix of the estimates

[,1] [,2] [,3] [,4]


[1,] 4.341949e-02 0.007746416 0.0009624982 6.528035e-05
[2,] 7.746416e-03 0.248005461 0.1437885487 1.442857e-01
[3,] 9.624982e-04 0.143788549 0.6439844004 1.435196e-01
[4,] 6.528035e-05 0.144285704 0.1435196176 1.563852e-01

> fitph$loglik

[1] -423.3526 -416.0686

> fitph2 <- coxph(surv1 ~ Z1 + Z2+ Z3 +as.factor(Z11), method="breslow")


> summary(fitph2)

Call:
coxph(formula = surv1 ~ Z1 + Z2 + Z3 + as.factor(Z11), method = "breslow")

n= 154, number of events= 99

coef exp(coef) se(coef) z Pr(>|z|)


Z1 0.5408 1.7174 0.2109 2.564 0.01034 *
Z2 0.6466 1.9090 0.2365 2.734 0.00625 **
Z3 -0.1273 0.8805 0.3186 -0.399 0.68954
as.factor(Z11)2 -0.8113 0.4443 0.5114 -1.587 0.11259
as.factor(Z11)3 -1.3044 0.2713 0.8162 -1.598 0.11000
as.factor(Z11)4 -0.2102 0.8105 0.4039 -0.520 0.60286
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

exp(coef) exp(-coef) lower .95 upper .95


Z1 1.7174 0.5823 1.13593 2.597

9
Z2 1.9090 0.5238 1.20091 3.035
Z3 0.8805 1.1357 0.47158 1.644
as.factor(Z11)2 0.4443 2.2509 0.16307 1.210
as.factor(Z11)3 0.2713 3.6857 0.05479 1.344
as.factor(Z11)4 0.8105 1.2339 0.36720 1.789

Concordance= 0.648 (se = 0.034 )


Rsquare= 0.131 (max possible= 0.996 )
Likelihood ratio test= 21.6 on 6 df, p=0.001433
Wald test = 20.34 on 6 df, p=0.002408
Score (logrank) test = 21.48 on 6 df, p=0.001503

>

5 Model Diagnostics
5.1 PH Assumption
Data Residivis

> ### Checking PH assumption ##


> ## Data Residivis ###
> rossi <- read.table("http://cran.r-project.org/doc/contrib/Fox-Companion/Rossi.txt", heade
> fit1 <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar
+ + paro + prio, data=rossi)
> fit1

Call:
coxph(formula = Surv(week, arrest) ~ fin + age + race + wexp +
mar + paro + prio, data = rossi)

coef exp(coef) se(coef) z p


fin -0.3794 0.684 0.1914 -1.983 0.0470
age -0.0574 0.944 0.0220 -2.611 0.0090
race 0.3139 1.369 0.3080 1.019 0.3100
wexp -0.1498 0.861 0.2122 -0.706 0.4800
mar -0.4337 0.648 0.3819 -1.136 0.2600
paro -0.0849 0.919 0.1958 -0.434 0.6600
prio 0.0915 1.096 0.0286 3.194 0.0014

Likelihood ratio test=33.3 on 7 df, p=2.36e-05 n= 432, number of events= 114

> svfit <- survfit(Surv(week, arrest)~ wexp, conf.type="none", data=rossi)


> plot(svfit,col=1:2)
> legend("bottomleft",legend= c("not fUll time", "full time"),

10
+ col = 1:4, lty=1:4, title = "work Exp")
>
1.0
0.8
0.6
0.4
0.2

work Exp
not fUll time
0.0

full time

0 10 20 30 40 50

Work Experience

> sumfit <- summary(svfit)


> plot(sumfit$time[sumfit$strata=="wexp=0"],
+ -log(-log(sumfit$surv[sumfit$strata=="wexp=0"])),
+ type="s", xlab="time", ylab="-ln -ln S")
> lines(sumfit$time[sumfit$strata=="wexp=1"],
+ -log(-log(sumfit$ surv[sumfit$strata=="wexp=1"])), type="s",col=2)
> legend("bottomleft",legend= c("not fUll time", "full time"),
+ col = 1:4, lty=1:4, title = "work Exp")

11
5
4
−ln −ln S

3
2

work Exp
not fUll time
1

full time

0 10 20 30 40 50

time

Age
> ## We need to categorize the Age variable ##
> library(car)
> rossi$age.cat <- recode(rossi$age, " lo:19=1; 20:25=2; 26:30=3; 31:hi=4 ")
> table(rossi$age.cat)

1 2 3 4
66 236 66 64

> ## Create Survival Curves (KM)


> svfit2 <- survfit(Surv(week, arrest)~ age.cat, conf.type="none", data=rossi)
> plot(svfit2, col=1:4, lty=1:4, lwd=2)
> legend("bottomleft",legend= c("<19", "20-25","26-30", ">30"),
+ col = 1:4, lty=1:4, title = "age Cat")
>
>

12
1.0
0.8
0.6
0.4

age Cat
0.2

<19
20−25
26−30
0.0

>30

0 10 20 30 40 50

> sumfit2 <- summary(svfit2)


> plot(sumfit2$time[sumfit2$strata=="age.cat=1"],
+ -log(-log(sumfit2$ surv[sumfit2$strata=="age.cat=1"])),
+ type="s", xlab="time", ylab="-ln -ln S",lwd=2)
> lines(sumfit2$time[sumfit2$strata=="age.cat=2"],
+ -log(-log(sumfit2$ surv[sumfit2$strata=="age.cat=2"])), type="s",col=2,lwd=2)
> lines(sumfit2$time[sumfit2$strata=="age.cat=3"],
+ -log(-log(sumfit2$ surv[sumfit2$strata=="age.cat=3"])), type="s",col=3,lwd=2)
> lines(sumfit2$time[sumfit2$strata=="age.cat=4"],
+ -log(-log(sumfit2$ surv[sumfit2$strata=="age.cat=4"])), type="s",col=4,lwd=2)
> legend("bottomleft",legend= c("<19", "20-25","26-30", ">30"),
+ col = 1:4, lty=1:4, title = "age Cat")

13
4
3
−ln −ln S

age Cat
<19
1

20−25
26−30
>30

10 20 30 40 50

time

5.2 GOF test


> fit1 <- coxph(Surv(week, arrest) ~ fin + age + prio, data=rossi,,method="breslow")
> fit1

Call:
coxph(formula = Surv(week, arrest) ~ fin + age + prio, data = rossi,
method = "breslow")

coef exp(coef) se(coef) z p


fin -0.3464 0.707 0.1902 -1.82 0.06900
age -0.0669 0.935 0.0208 -3.21 0.00130
prio 0.0965 1.101 0.0272 3.54 0.00039

Likelihood ratio test=28.9 on 3 df, p=2.35e-06 n= 432, number of events= 114

> asphrs <- cox.zph(fit1)


> asphrs

rho chisq p
fin -0.00651 0.00497 0.9438

14
age -0.20945 6.50520 0.0108
prio -0.07971 0.76175 0.3828
GLOBAL NA 7.08305 0.0693

> par(mfrow=c(2,2))
> for (i in 1:3) plot(asphrs[i])
>
2

0.8
Beta(t) for age
Beta(t) for fin

0.4
0
−2 −1

0.0
−0.4

7.9 20 32 44 7.9 20 32 44

Time Time
1.0
Beta(t) for prio

0.5
0.0

7.9 20 32 44

Time

5.3 Outlier
> ## Outlier ##
> dfbeta <- residuals(fit1, type="dfbeta")
> par(mfrow=c(2,2))
> for (j in 1:3) {
+ plot(dfbeta[,j], ylab=names(coef(fit1))[j])
+ abline(h=0, lty=2)
+ }
>

15
0.02

0.004 0.008
age
0.00
fin

−0.002
−0.02

0 100 200 300 400 0 100 200 300 400

Index Index
0.005
prio

−0.005

0 100 200 300 400

Index

type=dfbeta : estimated changes in the regression coefficients upon delet-


ing each observation. Results shows that none of the observations is terribly
influential individually.

5.4 Linierity
> par(mfrow=c(2,2))
> res <- residuals(fit1, type="martingale")
> X <- as.matrix(rossi[,c("age", "prio")]) # matrix of covariates
> par(mfrow=c(2,2))
> for (j in 1:2) { # residual plots
+ plot(X[,j], res, xlab=c("age", "prio")[j], ylab="residuals")
+ abline(h=0, lty=2)
+ lines(lowess(X[,j], res, iter=0))
+ }
> b <- coef(fit1)[c(2,3)] # regression coefficients
> for (j in 1:2) { # partial-residual plots
+ plot(X[,j], b[j]*X[,j] + res, xlab=c("age", "prio")[j],
+ ylab="component+residual")
+ abline(lm(b[j]*X[,j] + res ~ X[,j]), lty=2)
+ lines(lowess(X[,j], b[j]*X[,j] + res, iter=0))
+ }
>

16
1.0

1.0
0.5

0.5
residuals

residuals
0.0

0.0
−1.0

−1.0
20 25 30 35 40 45 0 5 10 15

age prio
component+residual

component+residual

2.0
−1.0

1.0
−2.0

0.0
−3.0

20 25 30 35 40 45 0 5 10 15

age prio

The partial residual plot is a graphical technique showing the relationship


between a given independent variable and the response variable given that other
independent variables are also in the model.

6 Stratified Cox PH
> ## Stratified cox PH by age category ##
>
> ## assume no interaction ##
> str.fit1 <- coxph(Surv(week, arrest) ~ fin + prio + strata(age.cat), data=rossi
+ ,method="breslow")
> str.fit1

Call:
coxph(formula = Surv(week, arrest) ~ fin + prio + strata(age.cat),
data = rossi, method = "breslow")

coef exp(coef) se(coef) z p


fin -0.342 0.711 0.190 -1.80 0.0730
prio 0.094 1.099 0.027 3.48 0.0005

17
Likelihood ratio test=13.4 on 2 df, p=0.0012 n= 432, number of events= 114

> str.fit1$loglik[2]

[1] -522.7693

> ## PH Assumption test ##


>
> cox.zph(str.fit1)

rho chisq p
fin -0.0191 0.0423 0.837
prio -0.0753 0.6491 0.420
GLOBAL NA 0.6956 0.706

> str.fit1a <- coxph(Surv(week, arrest) ~ fin + strata(age.cat), data=rossi


+ ,method="breslow")
> str.fit1a

Call:
coxph(formula = Surv(week, arrest) ~ fin + strata(age.cat), data = rossi,
method = "breslow")

coef exp(coef) se(coef) z p


fin -0.337 0.714 0.19 -1.78 0.076

Likelihood ratio test=3.19 on 1 df, p=0.0739 n= 432, number of events= 114

> str.fit1a$loglik[2]

[1] -527.8945

> ## Likelihood ratio test ##


> pchisq(-2*str.fit1a$loglik[2] - (-2*str.fit1$loglik[2]),1, lower.tail=F )

[1] 0.001366571

> ## Wald test ##


> pchisq( (str.fit1$loglik[2]/sqrt(diag(str.fit1$var))[2])^2,1,lower.tail=F )

[1] 0

>
>

With Interaction Assumption

18
> # Test
>
> str.fit2 <- coxph(Surv(week, arrest) ~ fin * strata(age.cat)+ prio * strata(age.cat),
+ data=rossi, method="breslow")
> str.fit2

Call:
coxph(formula = Surv(week, arrest) ~ fin * strata(age.cat) +
prio * strata(age.cat), data = rossi, method = "breslow")

coef exp(coef) se(coef) z p


fin 0.1260 1.1343 0.3543 0.3557 0.7200
prio 0.1595 1.1729 0.0487 3.2718 0.0011
fin:strata(age.cat)age.cat=2 -0.5802 0.5598 0.4485 -1.2937 0.2000
fin:strata(age.cat)age.cat=3 -0.0217 0.9786 0.6060 -0.0357 0.9700
fin:strata(age.cat)age.cat=4 -2.4001 0.0907 1.1338 -2.1168 0.0340
strata(age.cat)age.cat=2:prio -0.0932 0.9110 0.0641 -1.4544 0.1500
strata(age.cat)age.cat=3:prio -0.0410 0.9598 0.0757 -0.5424 0.5900
strata(age.cat)age.cat=4:prio -0.1732 0.8410 0.1779 -0.9737 0.3300

Likelihood ratio test=24.1 on 8 df, p=0.00223 n= 432, number of events= 114

> str.fit2$loglik[2]

[1] -517.4568

> ## Likelihood ratio test ##


> ## Test for Interaction ##
>
> pchisq(-2*str.fit1$loglik[2] - (-2*str.fit2$loglik[2]), 6, lower.tail=F )

[1] 0.1006833

>
>

Result shows that there is no evidence of interaction assumption at 5% level.

7 Example: Data Addicts


> addicts <- read.table("http://stat.ethz.ch/education/semesters/ss2011/seminar/addicts.dat"
+ header = TRUE)
> head(addicts)

clinic status time prison dose


1 1 1 428 0 50

19
2 1 1 275 1 55
3 1 1 262 0 55
4 1 1 183 0 30
5 1 1 259 1 65
6 1 1 714 0 55

> ## KM curve for clinic ##


>
> fit.cln <- survfit(Surv(time, status)~ clinic, conf.type="none", data=addicts)
> plot(fit.cln, col=1:2, , lty=1:2)
> legend("bottomleft", c("clinic 1", "clinic 2"), col=1:2, lty=1:2)
> survdiff(Surv(time, status)~ clinic, data=addicts)

Call:
survdiff(formula = Surv(time, status) ~ clinic, data = addicts)

N Observed Expected (O-E)^2/E (O-E)^2/V


clinic=1 161 121 90.1 10.6 27.2
clinic=2 77 29 59.9 15.9 27.2

Chisq= 27.2 on 1 degrees of freedom, p= 1.79e-07

>
1.0
0.8
0.6
0.4
0.2

clinic 1
0.0

clinic 2

0 200 400 600 800 1000

20
> ## - Ln - ln Plot ##
>
> sumfit <- summary(fit.cln)
> str(sumfit)

List of 11
$ n : int [1:2] 161 77
$ time : num [1:141] 7 17 19 29 30 33 35 37 41 44 ...
$ n.risk : num [1:141] 160 159 158 155 154 153 152 151 150 149 ...
$ n.event : num [1:141] 1 1 1 1 1 1 1 1 1 1 ...
$ n.censor: num [1:141] 0 0 0 0 0 0 0 0 0 0 ...
$ surv : num [1:141] 0.994 0.988 0.981 0.975 0.969 ...
$ type : chr "right"
$ strata : Factor w/ 2 levels "clinic=1","clinic=2": 1 1 1 1 1 1 1 1 1 1 ...
$ std.err : num [1:141] 0.00623 0.00878 0.01072 0.01238 0.01383 ...
$ call : language survfit(formula = Surv(time, status) ~ clinic, data = addicts, conf.ty
$ table : num [1:2, 1:5] 161 77 161 77 161 77 121 29 428 NA
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:2] "clinic=1" "clinic=2"
.. ..$ : chr [1:5] "records" "n.max" "n.start" "events" ...
- attr(*, "class")= chr "summary.survfit"

> plot(sumfit$time[sumfit$strata=="clinic=1"],
+ -log(-log(sumfit$surv[sumfit$strata=="clinic=1"])),
+ type="s", xlab="time", ylab="-ln -ln S")
> lines(sumfit$time[sumfit$strata=="clinic=2"],
+ -log(-log(sumfit$ surv[sumfit$strata=="clinic=2"])), type="s",col=2)
> legend("bottomleft", c("clinic 1", "clinic 2"), col=1:2, lty=1:2)
>

21
5
4
3
−ln −ln S

2
1
0
−1

clinic 1
clinic 2

0 200 400 600 800

time

> ## KM curve for Prison ##


>
> fit.prs <- survfit(Surv(time, status)~ prison, conf.type="none", data=addicts)
> plot(fit.prs, col=1:2, , lty=1:2)
> legend("bottomleft", c("clinic 1", "clinic 2"), col=1:2, lty=1:2)
> survdiff(Surv(time, status)~ prison, data=addicts)

Call:
survdiff(formula = Surv(time, status) ~ prison, data = addicts)

N Observed Expected (O-E)^2/E (O-E)^2/V


prison=0 126 81 87.2 0.445 1.07
prison=1 112 69 62.8 0.619 1.07

Chisq= 1.1 on 1 degrees of freedom, p= 0.3

22
1.0
0.8
0.6
0.4
0.2

clinic 1
0.0

clinic 2

0 200 400 600 800 1000

> ## - Ln - ln Plot for Clinic ##


>
> sumfit <- summary(fit.prs)
> str(sumfit)

List of 11
$ n : int [1:2] 126 112
$ time : num [1:142] 26 30 35 37 41 44 47 49 50 62 ...
$ n.risk : num [1:142] 126 123 122 121 120 118 117 116 115 113 ...
$ n.event : num [1:142] 1 1 1 1 2 1 1 1 1 1 ...
$ n.censor: num [1:142] 0 0 0 0 0 0 0 0 0 0 ...
$ surv : num [1:142] 0.992 0.984 0.976 0.968 0.952 ...
$ type : chr "right"
$ strata : Factor w/ 2 levels "prison=0","prison=1": 1 1 1 1 1 1 1 1 1 1 ...
$ std.err : num [1:142] 0.0079 0.0112 0.0137 0.0158 0.0192 ...
$ call : language survfit(formula = Surv(time, status) ~ prison, data = addicts, conf.ty
$ table : num [1:2, 1:5] 126 112 126 112 126 112 81 69 532 394
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:2] "prison=0" "prison=1"
.. ..$ : chr [1:5] "records" "n.max" "n.start" "events" ...
- attr(*, "class")= chr "summary.survfit"

23
> plot(sumfit$time[sumfit$strata=="prison=0"],
+ -log(-log(sumfit$surv[sumfit$strata=="prison=0"])),
+ type="s", xlab="time", ylab="-ln -ln S")
> lines(sumfit$time[sumfit$strata=="prison=1"],
+ -log(-log(sumfit$ surv[sumfit$strata=="prison=1"])), type="s",col=2)
> legend("bottomleft", c("Prison 0", "Prison 1"), col=1:2, lty=1:2)
>
>
5
4
3
−ln −ln S

2
1
0

Prison 0
Prison 1

0 200 400 600 800

time

> fit.add <- coxph(Surv(time, status) ~ prison + dose + clinic, data=addicts)


> fit.add
Call:
coxph(formula = Surv(time, status) ~ prison + dose + clinic,
data = addicts)

coef exp(coef) se(coef) z p


prison 0.3250 1.384 0.16790 1.94 5.3e-02
dose -0.0351 0.965 0.00641 -5.48 4.2e-08
clinic -0.9936 0.370 0.21175 -4.69 2.7e-06

Likelihood ratio test=63 on 3 df, p=1.31e-13 n= 238, number of events= 150

24
> ## GOF test ##
> asphrs <- cox.zph(fit.add )
> asphrs
rho chisq p
prison -0.0116 0.0204 0.8866
dose 0.0646 0.5660 0.4519
clinic -0.2458 10.0839 0.0015
GLOBAL NA 10.9371 0.0121
> par(mfrow=c(2,2))
> for (i in 1:3) plot(asphrs[i])

0.2
1 2 3
Beta(t) for prison

Beta(t) for dose

0.0
−1

−0.2
−3

43 220 470 740 43 220 470 740

Time Time
4
Beta(t) for clinic

2
0
−4 −2

43 220 470 740

Time

8 Parametric Modeling
> ## Cox PH Model ##
>
> fit1 <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar
+ + paro + prio,data=rossi)
> summary(fit1)
Call:
coxph(formula = Surv(week, arrest) ~ fin + age + race + wexp +

25
mar + paro + prio, data = rossi)

n= 432, number of events= 114

coef exp(coef) se(coef) z Pr(>|z|)


fin -0.37942 0.68426 0.19138 -1.983 0.04742 *
age -0.05744 0.94418 0.02200 -2.611 0.00903 **
race 0.31390 1.36875 0.30799 1.019 0.30812
wexp -0.14980 0.86088 0.21222 -0.706 0.48029
mar -0.43370 0.64810 0.38187 -1.136 0.25606
paro -0.08487 0.91863 0.19576 -0.434 0.66461
prio 0.09150 1.09581 0.02865 3.194 0.00140 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

exp(coef) exp(-coef) lower .95 upper .95


fin 0.6843 1.4614 0.4702 0.9957
age 0.9442 1.0591 0.9043 0.9858
race 1.3688 0.7306 0.7484 2.5032
wexp 0.8609 1.1616 0.5679 1.3049
mar 0.6481 1.5430 0.3066 1.3699
paro 0.9186 1.0886 0.6259 1.3482
prio 1.0958 0.9126 1.0360 1.1591

Concordance= 0.64 (se = 0.027 )


Rsquare= 0.074 (max possible= 0.956 )
Likelihood ratio test= 33.27 on 7 df, p=2.362e-05
Wald test = 32.11 on 7 df, p=3.871e-05
Score (logrank) test = 33.53 on 7 df, p=2.11e-05

> ## AFT model ##


>
> ## Weibull ##
>
> fit.weib <- survreg(Surv(week, arrest) ~ fin + age + race + wexp + mar+ paro + prio,data=
> summary(fit.weib)

Call:
survreg(formula = Surv(week, arrest) ~ fin + age + race + wexp +
mar + paro + prio, data = rossi, dist = "weibull")
Value Std. Error z p
(Intercept) 3.9901 0.4191 9.521 1.72e-21
fin 0.2722 0.1380 1.973 4.85e-02
age 0.0407 0.0160 2.544 1.10e-02
race -0.2248 0.2202 -1.021 3.07e-01
wexp 0.1066 0.1515 0.703 4.82e-01

26
mar 0.3113 0.2733 1.139 2.55e-01
paro 0.0588 0.1396 0.421 6.74e-01
prio -0.0658 0.0209 -3.143 1.67e-03
Log(scale) -0.3391 0.0890 -3.809 1.39e-04

Scale= 0.712

Weibull distribution
Loglik(model)= -679.9 Loglik(intercept only)= -696.6
Chisq= 33.42 on 7 degrees of freedom, p= 2.2e-05
Number of Newton-Raphson Iterations: 6
n= 432

> ## Exponential ##
>
> fit.exp <- survreg(Surv(week, arrest) ~ fin + age + race + wexp + mar+ paro + prio,data=r
> summary(fit.exp)

Call:
survreg(formula = Surv(week, arrest) ~ fin + age + race + wexp +
mar + paro + prio, data = rossi, dist = "exponential")
Value Std. Error z p
(Intercept) 4.0507 0.5860 6.912 4.78e-12
fin 0.3663 0.1911 1.916 5.53e-02
age 0.0556 0.0218 2.546 1.09e-02
race -0.3049 0.3079 -0.990 3.22e-01
wexp 0.1467 0.2117 0.693 4.88e-01
mar 0.4270 0.3814 1.120 2.63e-01
paro 0.0826 0.1956 0.423 6.73e-01
prio -0.0857 0.0283 -3.025 2.48e-03

Scale fixed at 1

Exponential distribution
Loglik(model)= -686.4 Loglik(intercept only)= -702
Chisq= 31.22 on 7 degrees of freedom, p= 5.7e-05
Number of Newton-Raphson Iterations: 5
n= 432

> ## Log Logistic ##


>
> fit.llg <- survreg(Surv(week, arrest) ~ fin + age + race + wexp + mar+ paro + prio,data=r
> summary(fit.llg)

Call:
survreg(formula = Surv(week, arrest) ~ fin + age + race + wexp +
mar + paro + prio, data = rossi, dist = "loglogistic")

27
Value Std. Error z p
(Intercept) 3.9183 0.4274 9.17 4.86e-20
fin 0.2889 0.1456 1.98 4.72e-02
age 0.0364 0.0156 2.34 1.95e-02
race -0.2791 0.2297 -1.22 2.24e-01
wexp 0.1784 0.1572 1.14 2.56e-01
mar 0.3473 0.2697 1.29 1.98e-01
paro 0.0508 0.1496 0.34 7.34e-01
prio -0.0692 0.0227 -3.04 2.35e-03
Log(scale) -0.4352 0.0864 -5.04 4.75e-07

Scale= 0.647

Log logistic distribution


Loglik(model)= -679.9 Loglik(intercept only)= -696.7
Chisq= 33.47 on 7 degrees of freedom, p= 2.2e-05
Number of Newton-Raphson Iterations: 4
n= 432

> #compare the model ##


>
> extractAIC(fit.llg)[2]

[1] 1377.877

> extractAIC(fit.exp)[2]

[1] 1388.732

> extractAIC(fit.weib)[2]

[1] 1377.833

>
>

Which model fit best?

28

You might also like