Professional Documents
Culture Documents
Setia Pramana
July 2, 2014
> library(survival)
> mini.surv <- survfit(Surv(time, censor)~ drug, conf.type="none", data=mini)
> summary(mini.surv)
drug=0
time n.risk n.event survival std.err
3 4 1 0.75 0.217
5 3 1 0.50 0.250
12 2 1 0.25 0.217
22 1 1 0.00 NaN
1
drug=1
time n.risk n.event survival std.err
3 5 1 0.800 0.179
7 3 1 0.533 0.248
8 2 1 0.267 0.226
9 1 1 0.000 NaN
> mini.surv
Call:
survdiff(formula = Surv(time, censor) ~ drug, data = mini)
>
2
1.0
Control
treatment
0.8
Survival Probability
0.6
0.4
0.2
0.0
0 5 10 15 20
Time
3
Call: survfit(formula = Surv(time, censor) ~ drug, data = twogroup,
conf.type = "none")
drug=0
time n.risk n.event survival std.err
3 5 1 0.800 0.179
5 3 1 0.533 0.248
22 2 1 0.267 0.226
34 1 1 0.000 NaN
drug=1
time n.risk n.event survival std.err
2 5 1 0.8 0.179
3 4 1 0.6 0.219
7 2 1 0.3 0.239
11 1 1 0.0 NaN
Treatment
Control
0.8
0.6
0.4
0.2
0.0
0 5 10 15 20 25 30 35
4
3 Log-Rank Test
> survdiff(Surv(time, censor) ~ drug, data=twogroup)
Call:
survdiff(formula = Surv(time, censor) ~ drug, data = twogroup)
>
[1] 42
group=6mer
time n.risk n.event survival std.err lower 95% CI upper 95% CI
6 21 3 0.857 0.0764 0.720 1.000
7 17 1 0.807 0.0869 0.653 0.996
10 15 1 0.753 0.0963 0.586 0.968
13 12 1 0.690 0.1068 0.510 0.935
16 11 1 0.627 0.1141 0.439 0.896
22 7 1 0.538 0.1282 0.337 0.858
23 6 1 0.448 0.1346 0.249 0.807
group=control
time n.risk n.event survival std.err lower 95% CI upper 95% CI
1 21 2 0.9048 0.0641 0.78754 1.000
2 19 2 0.8095 0.0857 0.65785 0.996
5
3 17 1 0.7619 0.0929 0.59988 0.968
4 16 2 0.6667 0.1029 0.49268 0.902
5 14 2 0.5714 0.1080 0.39455 0.828
8 12 4 0.3810 0.1060 0.22085 0.657
11 8 2 0.2857 0.0986 0.14529 0.562
12 6 2 0.1905 0.0857 0.07887 0.460
15 4 1 0.1429 0.0764 0.05011 0.407
17 3 1 0.0952 0.0641 0.02549 0.356
22 2 1 0.0476 0.0465 0.00703 0.322
23 1 1 0.0000 NaN NA NA
Call:
survdiff(formula = Surv(time, censor) ~ group, data = leukdata)
>
>
6
1.0
Treatment
Control
0.8
0.6
Survival
0.4
0.2
0.0
0 5 10 15 20 25 30 35
Weeks
7
Concordance= 0.69 (se = 0.053 )
Rsquare= 0.304 (max possible= 0.989 )
Likelihood ratio test= 15.21 on 1 df, p=9.615e-05
Wald test = 13.58 on 1 df, p=0.0002288
Score (logrank) test = 15.93 on 1 df, p=6.571e-05
Data Burn:
T1: Time to excision or on study time D1: Excision indicator: 1=yes 0=no
(censoring)
Z1: Treatment: 0-routine bathing 1-Body cleansing Z2: Gender (0=male
1=female) Z3: Race 0=nonwhite 1=white Z11: Type of burn: 1=chemical,
2=scald, 3=electric, 4=flame
> data(burn)
> attach(burn)
> head(burn)
Call:
coxph(formula = surv1 ~ Z1 + as.factor(Z11), method = "breslow")
8
as.factor(Z11)4 0.6656 1.5025 0.30661 1.445
> fitph$loglik
Call:
coxph(formula = surv1 ~ Z1 + Z2 + Z3 + as.factor(Z11), method = "breslow")
9
Z2 1.9090 0.5238 1.20091 3.035
Z3 0.8805 1.1357 0.47158 1.644
as.factor(Z11)2 0.4443 2.2509 0.16307 1.210
as.factor(Z11)3 0.2713 3.6857 0.05479 1.344
as.factor(Z11)4 0.8105 1.2339 0.36720 1.789
>
5 Model Diagnostics
5.1 PH Assumption
Data Residivis
Call:
coxph(formula = Surv(week, arrest) ~ fin + age + race + wexp +
mar + paro + prio, data = rossi)
10
+ col = 1:4, lty=1:4, title = "work Exp")
>
1.0
0.8
0.6
0.4
0.2
work Exp
not fUll time
0.0
full time
0 10 20 30 40 50
Work Experience
11
5
4
−ln −ln S
3
2
work Exp
not fUll time
1
full time
0 10 20 30 40 50
time
Age
> ## We need to categorize the Age variable ##
> library(car)
> rossi$age.cat <- recode(rossi$age, " lo:19=1; 20:25=2; 26:30=3; 31:hi=4 ")
> table(rossi$age.cat)
1 2 3 4
66 236 66 64
12
1.0
0.8
0.6
0.4
age Cat
0.2
<19
20−25
26−30
0.0
>30
0 10 20 30 40 50
13
4
3
−ln −ln S
age Cat
<19
1
20−25
26−30
>30
10 20 30 40 50
time
Call:
coxph(formula = Surv(week, arrest) ~ fin + age + prio, data = rossi,
method = "breslow")
rho chisq p
fin -0.00651 0.00497 0.9438
14
age -0.20945 6.50520 0.0108
prio -0.07971 0.76175 0.3828
GLOBAL NA 7.08305 0.0693
> par(mfrow=c(2,2))
> for (i in 1:3) plot(asphrs[i])
>
2
0.8
Beta(t) for age
Beta(t) for fin
0.4
0
−2 −1
0.0
−0.4
7.9 20 32 44 7.9 20 32 44
Time Time
1.0
Beta(t) for prio
0.5
0.0
7.9 20 32 44
Time
5.3 Outlier
> ## Outlier ##
> dfbeta <- residuals(fit1, type="dfbeta")
> par(mfrow=c(2,2))
> for (j in 1:3) {
+ plot(dfbeta[,j], ylab=names(coef(fit1))[j])
+ abline(h=0, lty=2)
+ }
>
15
0.02
0.004 0.008
age
0.00
fin
−0.002
−0.02
Index Index
0.005
prio
−0.005
Index
5.4 Linierity
> par(mfrow=c(2,2))
> res <- residuals(fit1, type="martingale")
> X <- as.matrix(rossi[,c("age", "prio")]) # matrix of covariates
> par(mfrow=c(2,2))
> for (j in 1:2) { # residual plots
+ plot(X[,j], res, xlab=c("age", "prio")[j], ylab="residuals")
+ abline(h=0, lty=2)
+ lines(lowess(X[,j], res, iter=0))
+ }
> b <- coef(fit1)[c(2,3)] # regression coefficients
> for (j in 1:2) { # partial-residual plots
+ plot(X[,j], b[j]*X[,j] + res, xlab=c("age", "prio")[j],
+ ylab="component+residual")
+ abline(lm(b[j]*X[,j] + res ~ X[,j]), lty=2)
+ lines(lowess(X[,j], b[j]*X[,j] + res, iter=0))
+ }
>
16
1.0
1.0
0.5
0.5
residuals
residuals
0.0
0.0
−1.0
−1.0
20 25 30 35 40 45 0 5 10 15
age prio
component+residual
component+residual
2.0
−1.0
1.0
−2.0
0.0
−3.0
20 25 30 35 40 45 0 5 10 15
age prio
6 Stratified Cox PH
> ## Stratified cox PH by age category ##
>
> ## assume no interaction ##
> str.fit1 <- coxph(Surv(week, arrest) ~ fin + prio + strata(age.cat), data=rossi
+ ,method="breslow")
> str.fit1
Call:
coxph(formula = Surv(week, arrest) ~ fin + prio + strata(age.cat),
data = rossi, method = "breslow")
17
Likelihood ratio test=13.4 on 2 df, p=0.0012 n= 432, number of events= 114
> str.fit1$loglik[2]
[1] -522.7693
rho chisq p
fin -0.0191 0.0423 0.837
prio -0.0753 0.6491 0.420
GLOBAL NA 0.6956 0.706
Call:
coxph(formula = Surv(week, arrest) ~ fin + strata(age.cat), data = rossi,
method = "breslow")
> str.fit1a$loglik[2]
[1] -527.8945
[1] 0.001366571
[1] 0
>
>
18
> # Test
>
> str.fit2 <- coxph(Surv(week, arrest) ~ fin * strata(age.cat)+ prio * strata(age.cat),
+ data=rossi, method="breslow")
> str.fit2
Call:
coxph(formula = Surv(week, arrest) ~ fin * strata(age.cat) +
prio * strata(age.cat), data = rossi, method = "breslow")
> str.fit2$loglik[2]
[1] -517.4568
[1] 0.1006833
>
>
19
2 1 1 275 1 55
3 1 1 262 0 55
4 1 1 183 0 30
5 1 1 259 1 65
6 1 1 714 0 55
Call:
survdiff(formula = Surv(time, status) ~ clinic, data = addicts)
>
1.0
0.8
0.6
0.4
0.2
clinic 1
0.0
clinic 2
20
> ## - Ln - ln Plot ##
>
> sumfit <- summary(fit.cln)
> str(sumfit)
List of 11
$ n : int [1:2] 161 77
$ time : num [1:141] 7 17 19 29 30 33 35 37 41 44 ...
$ n.risk : num [1:141] 160 159 158 155 154 153 152 151 150 149 ...
$ n.event : num [1:141] 1 1 1 1 1 1 1 1 1 1 ...
$ n.censor: num [1:141] 0 0 0 0 0 0 0 0 0 0 ...
$ surv : num [1:141] 0.994 0.988 0.981 0.975 0.969 ...
$ type : chr "right"
$ strata : Factor w/ 2 levels "clinic=1","clinic=2": 1 1 1 1 1 1 1 1 1 1 ...
$ std.err : num [1:141] 0.00623 0.00878 0.01072 0.01238 0.01383 ...
$ call : language survfit(formula = Surv(time, status) ~ clinic, data = addicts, conf.ty
$ table : num [1:2, 1:5] 161 77 161 77 161 77 121 29 428 NA
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:2] "clinic=1" "clinic=2"
.. ..$ : chr [1:5] "records" "n.max" "n.start" "events" ...
- attr(*, "class")= chr "summary.survfit"
> plot(sumfit$time[sumfit$strata=="clinic=1"],
+ -log(-log(sumfit$surv[sumfit$strata=="clinic=1"])),
+ type="s", xlab="time", ylab="-ln -ln S")
> lines(sumfit$time[sumfit$strata=="clinic=2"],
+ -log(-log(sumfit$ surv[sumfit$strata=="clinic=2"])), type="s",col=2)
> legend("bottomleft", c("clinic 1", "clinic 2"), col=1:2, lty=1:2)
>
21
5
4
3
−ln −ln S
2
1
0
−1
clinic 1
clinic 2
time
Call:
survdiff(formula = Surv(time, status) ~ prison, data = addicts)
22
1.0
0.8
0.6
0.4
0.2
clinic 1
0.0
clinic 2
List of 11
$ n : int [1:2] 126 112
$ time : num [1:142] 26 30 35 37 41 44 47 49 50 62 ...
$ n.risk : num [1:142] 126 123 122 121 120 118 117 116 115 113 ...
$ n.event : num [1:142] 1 1 1 1 2 1 1 1 1 1 ...
$ n.censor: num [1:142] 0 0 0 0 0 0 0 0 0 0 ...
$ surv : num [1:142] 0.992 0.984 0.976 0.968 0.952 ...
$ type : chr "right"
$ strata : Factor w/ 2 levels "prison=0","prison=1": 1 1 1 1 1 1 1 1 1 1 ...
$ std.err : num [1:142] 0.0079 0.0112 0.0137 0.0158 0.0192 ...
$ call : language survfit(formula = Surv(time, status) ~ prison, data = addicts, conf.ty
$ table : num [1:2, 1:5] 126 112 126 112 126 112 81 69 532 394
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:2] "prison=0" "prison=1"
.. ..$ : chr [1:5] "records" "n.max" "n.start" "events" ...
- attr(*, "class")= chr "summary.survfit"
23
> plot(sumfit$time[sumfit$strata=="prison=0"],
+ -log(-log(sumfit$surv[sumfit$strata=="prison=0"])),
+ type="s", xlab="time", ylab="-ln -ln S")
> lines(sumfit$time[sumfit$strata=="prison=1"],
+ -log(-log(sumfit$ surv[sumfit$strata=="prison=1"])), type="s",col=2)
> legend("bottomleft", c("Prison 0", "Prison 1"), col=1:2, lty=1:2)
>
>
5
4
3
−ln −ln S
2
1
0
Prison 0
Prison 1
time
24
> ## GOF test ##
> asphrs <- cox.zph(fit.add )
> asphrs
rho chisq p
prison -0.0116 0.0204 0.8866
dose 0.0646 0.5660 0.4519
clinic -0.2458 10.0839 0.0015
GLOBAL NA 10.9371 0.0121
> par(mfrow=c(2,2))
> for (i in 1:3) plot(asphrs[i])
0.2
1 2 3
Beta(t) for prison
0.0
−1
−0.2
−3
Time Time
4
Beta(t) for clinic
2
0
−4 −2
Time
8 Parametric Modeling
> ## Cox PH Model ##
>
> fit1 <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar
+ + paro + prio,data=rossi)
> summary(fit1)
Call:
coxph(formula = Surv(week, arrest) ~ fin + age + race + wexp +
25
mar + paro + prio, data = rossi)
Call:
survreg(formula = Surv(week, arrest) ~ fin + age + race + wexp +
mar + paro + prio, data = rossi, dist = "weibull")
Value Std. Error z p
(Intercept) 3.9901 0.4191 9.521 1.72e-21
fin 0.2722 0.1380 1.973 4.85e-02
age 0.0407 0.0160 2.544 1.10e-02
race -0.2248 0.2202 -1.021 3.07e-01
wexp 0.1066 0.1515 0.703 4.82e-01
26
mar 0.3113 0.2733 1.139 2.55e-01
paro 0.0588 0.1396 0.421 6.74e-01
prio -0.0658 0.0209 -3.143 1.67e-03
Log(scale) -0.3391 0.0890 -3.809 1.39e-04
Scale= 0.712
Weibull distribution
Loglik(model)= -679.9 Loglik(intercept only)= -696.6
Chisq= 33.42 on 7 degrees of freedom, p= 2.2e-05
Number of Newton-Raphson Iterations: 6
n= 432
> ## Exponential ##
>
> fit.exp <- survreg(Surv(week, arrest) ~ fin + age + race + wexp + mar+ paro + prio,data=r
> summary(fit.exp)
Call:
survreg(formula = Surv(week, arrest) ~ fin + age + race + wexp +
mar + paro + prio, data = rossi, dist = "exponential")
Value Std. Error z p
(Intercept) 4.0507 0.5860 6.912 4.78e-12
fin 0.3663 0.1911 1.916 5.53e-02
age 0.0556 0.0218 2.546 1.09e-02
race -0.3049 0.3079 -0.990 3.22e-01
wexp 0.1467 0.2117 0.693 4.88e-01
mar 0.4270 0.3814 1.120 2.63e-01
paro 0.0826 0.1956 0.423 6.73e-01
prio -0.0857 0.0283 -3.025 2.48e-03
Scale fixed at 1
Exponential distribution
Loglik(model)= -686.4 Loglik(intercept only)= -702
Chisq= 31.22 on 7 degrees of freedom, p= 5.7e-05
Number of Newton-Raphson Iterations: 5
n= 432
Call:
survreg(formula = Surv(week, arrest) ~ fin + age + race + wexp +
mar + paro + prio, data = rossi, dist = "loglogistic")
27
Value Std. Error z p
(Intercept) 3.9183 0.4274 9.17 4.86e-20
fin 0.2889 0.1456 1.98 4.72e-02
age 0.0364 0.0156 2.34 1.95e-02
race -0.2791 0.2297 -1.22 2.24e-01
wexp 0.1784 0.1572 1.14 2.56e-01
mar 0.3473 0.2697 1.29 1.98e-01
paro 0.0508 0.1496 0.34 7.34e-01
prio -0.0692 0.0227 -3.04 2.35e-03
Log(scale) -0.4352 0.0864 -5.04 4.75e-07
Scale= 0.647
[1] 1377.877
> extractAIC(fit.exp)[2]
[1] 1388.732
> extractAIC(fit.weib)[2]
[1] 1377.833
>
>
28