# Overview
over commands to the trial project
# This only
gives the R commands for the trial project.
# It is not
a draft solution of the project.
# Some
comments on how to write a project report was given at the lectures
# Read the
data into R and attach the survival library
cirrhosis=read.table("http://www.uio.no/studier/emner/matnat/math/STK4080/h14/cirrhosis.txt",
header=T)
library(survival)
# Define
acites as a factor:
cirrhosis$asc=factor(cirrhosis$asc)
# Use years
as time scale
cirrhosis$time=cirrhosis$time/365.25
# a)
#� Simple univariate
analyses for one covariate at a time
# Treatment
# Nelson-Aalen plots:
fit.naa=coxph(Surv(time,status)~strata(treat),data=cirrhosis)
surv.naa=survfit(fit.naa)
plot(surv.naa,fun="cumhaz",
mark.time=F ,lty=1:2, xlab="Years since
randomization",ylab="Cumulative hazard",
main="Treatment")
legend("topleft",legend=c("Prednisone","Placebo"),lty=1:2)
# Kaplan-Meier plots:
fit.km=survfit(Surv(time,status)~treat,data=cirrhosis,
conf.type="plain")
plot(fit.km, mark.time=F, lty=1:2,
xlab="Years since randomization",
ylab="Survival",main="Treatment")
legend("topright",legend=c("Prednisone","Placebo"),lty=1:2)
# Estimates of five years survival probabilities with "plain" confidence intervals (alternatively
we could have used the option "log-log")
summary(fit.km,time=5)
# Estimates of median survival time
print(fit.km)
# Alternatively, we may obtain the quartiles by the command:
quantile(fit.km)
# Log-rank test:
survdiff(Surv(time,status)~treat,data=cirrhosis)
# The commands for sex and ascites are similar to the
commands for treatment
# For numeric covariates age and prothrombin index, we have
create a categorical covariate (factor) by a "reasonable" grouping
# For age
cirrhosis$agegroup=cut(cirrhosis$age,breaks=c(0,49,59,69,100),
labels=1:4)
# For prothrombin index (approximately according to
quartiles)
cirrhosis$protgroup=cut(cirrhosis$prot,breaks=c(0,49,69,89,150),
labels=1:4)
# After we have obtained categorical covariates for age and prothrombin
index, the commands are similar to the ones given above for treatment
# b) Univariate Cox regressions
# Cox regression for treatment
cox.treat=coxph(Surv(time,status)~treat,data=cirrhosis)
summary(cox.treat)
# The commands for sex and ascites are similar to the
commands for treatment
# For the
numeric covariates age and prothrombin index, we need to decide how thay should
be coded �
#
(as given on the data file, or suitably transformed, or grouped).
# To see how age should be coded, we fit a model using a
spline for age:
cox.psage=coxph(Surv(time,status)~pspline(age),data=cirrhosis)
print(cox.psage)
termplot(cox.psage,se=T)
# Both the plot (from the templot-command) and the test (from
the print-command) show that it is reasonable to assume a log-linear effect of
age
# We will therefore fit a Cox model using age as a numeric
covariate.
# It may be sensible to report the effect of age per 10
years, so we define a new covariate
# where age is given per 10 years and fit a Cox model with
this covariate
cirrhosis$age10= cirrhosis$age/10
cox.age10=coxph(Surv(time,status)~age10,data=cirrhosis)
summary(cox.age10)
# Then we use a spline fit to see how prothrombin index
should be coded
cox.psprot=coxph(Surv(time,status)~pspline(prot),data=cirrhosis)
print(cox.psprot)
termplot(cox.psprot,se=T)
# The plot indicates that we do not have a log-linear effect
for low ages.
# As there is not easy to find a suitable transformation, we
chose to work with grouped prothrombin index
# (The non-linear part of prothrombin index is not
significant, so could also be acceptable to use prothrombin index as a numeric
covariate)
# When using grouped prothrombin index as a categorical
covariate, it is natural to use group with highest values as reference:
cirrhosis$protgroup=relevel(cirrhosis$protgroup,ref=4)
cox.protgroup=coxph(Surv(time,status)~protgroup,data=cirrhosis)
summary(cox.protgroup)
# c) Multivariate Cox regression
# We then fit a Cox model with all the covariates
cox.all=coxph(Surv(time,status)~treat+sex+asc+age10+protgroup,
data=cirrhosis)
summary(cox.all)
# [In principle it may be the case that the coding of a
numeric covariate that is appropriate for a univariate analysis,
# is not appropriate for a multivariate analysis and vice
versa. But this does not seem to be the case here (commands not shown)]
# We then check for first order interactions between any pair
of two covariates:
# We may e.g. check for interaction between treatment and sex
by the commands:
cox.int=coxph(Surv(time,status)~treat+sex+asc+age10+ protgroup+treat:sex,
data=cirrhosis)
anova(cox.all,cox.int)
# Similar commands may be used to check other first order
interactions
# [Note, however, that for checking interaction between
ascites and prothrombin group,
# we need to merge the two highest prothrombin groups (since
there is only one person with severe ascites in the highest prothrombin group)]
# We find an interaction between treatment and ascites and
between sex and age.
# To ease the interpretation of the interaction between sex
and the numeric covariate age, it is useful to
# center age by subtracting 60 years (which is close to the
mean age)
cirrhosis$cage10=(cirrhosis$age-60)/10
cox.final=coxph(Surv(time,status)~treat+sex+asc+cage10+protgroup+treat:asc+sex:cage10, data=cirrhosis)
summary(cox.final)
# Log-linearity of the numeric covariates has been checked
along the way using splines
# [both for the unvariate and mulivariate Cox models (the
latter commands not given here)]
# We also need to check for proportional hazards:
cox.test=cox.zph(cox.final,transform='log')
print(cox.test)
par(mfrow=c(2,6))
plot(cox.test)
# There are no indication of violation of the assumption of
proportional hazard
#
Make Kaplan-Meier plots for the four groups:
par(mfrow=c(1,1))
plot(survfit(Surv(time,status)~pred.group,data=cirrhosis),mark.time=F,lty=1:4)
#
For illustration we plot the estimated survival function for the following combinations
of the covariates:
#��� 1) prednisone, female, no ascites , age 60,
prothrombin above 90
#��� 2) prednisone, female, slight ascites , age
60, prothrombin 50-69
#��� 3) prednisone, female, slight ascites , age
60, prothrombin below 50
#��� 4) prednisone, female, marked ascites , age
60, prothrombin below 50
#��� 5) placebo, female, no ascites , age 60,
prothrombin above 90
#��� 6) placebo, female, slight ascites , age
60, prothrombin 50-69
#��� 7) placebo, female, slight ascites , age
60, prothrombin below 50
#��� 8) placebo, female, marked ascites , age
60, prothrombin below 50
#
Note that the levels of factors are given in quotes� when defining the new covariate values below
cox.final=coxph(Surv(time,status)~treat+sex+asc+cage10+protgroup +treat:asc+sex:cage10, data=cirrhosis)
par(mfrow=c(1,2))
new.covariates=data.frame(treat=c(0,0,0,0),sex=c(0,0,0,0),asc=c("0","1","1","2"),cage10=c(0,0,0,0),protgroup=c("4","2","1","1"))
surv.final=survfit(cox.final,newdata=new.covariates)
plot(surv.final,mark.time=F, xlab="Years since randomization",
ylab="Survival",lty=1:4,
main="Prednisone")
legend("topright",c("1","2","3","4"),lty=1:4)
new.covariates=data.frame(treat=c(1,1,1,1),sex=c(0,0,0,0),asc=c("0","1","1","2"),cage10=c(0,0,0,0),protgroup=c("4","2","1","1"))
surv.final=survfit(cox.final,newdata=new.covariates)
plot(surv.final,mark.time=F, xlab="Years since randomization",
ylab="Survival",lty=1:4,
main="Placebo")
legend("topright",c("5","6","7","8"),lty=1:4)