# Overview over commands to the trial project

 

# This only gives the R commands for the trial project.

# It is not a draft solution of the project.

# Some comments on how to write a project report was given at the lectures

 

# Read the data into R and attach the survival library

cirrhosis=read.table("http://www.uio.no/studier/emner/matnat/math/STK4080/h14/cirrhosis.txt", header=T)

library(survival)

 

# Define acites as a factor:

cirrhosis$asc=factor(cirrhosis$asc)

 

# Use years as time scale

cirrhosis$time=cirrhosis$time/365.25

 

 

# a)

#Simple univariate analyses for one covariate at a time

 

# Treatment

 

# Nelson-Aalen plots:

fit.naa=coxph(Surv(time,status)~strata(treat),data=cirrhosis)

surv.naa=survfit(fit.naa)

plot(surv.naa,fun="cumhaz", mark.time=F ,lty=1:2, xlab="Years since randomization",ylab="Cumulative hazard", main="Treatment")

legend("topleft",legend=c("Prednisone","Placebo"),lty=1:2)

 

# Kaplan-Meier plots:

fit.km=survfit(Surv(time,status)~treat,data=cirrhosis, conf.type="plain")

plot(fit.km, mark.time=F, lty=1:2, xlab="Years since randomization", ylab="Survival",main="Treatment")

legend("topright",legend=c("Prednisone","Placebo"),lty=1:2)

 

# Estimates of five years survival probabilities with "plain" confidence intervals (alternatively we could have used the option "log-log")

summary(fit.km,time=5)

 

# Estimates of median survival time

print(fit.km)

 

# Alternatively, we may obtain the quartiles by the command:

quantile(fit.km)

 

# Log-rank test:

survdiff(Surv(time,status)~treat,data=cirrhosis)

 

 

# The commands for sex and ascites are similar to the commands for treatment

 

 

# For numeric covariates age and prothrombin index, we have create a categorical covariate (factor) by a "reasonable" grouping

 

# For age

cirrhosis$agegroup=cut(cirrhosis$age,breaks=c(0,49,59,69,100), labels=1:4)

 

# For prothrombin index (approximately according to quartiles)

cirrhosis$protgroup=cut(cirrhosis$prot,breaks=c(0,49,69,89,150), labels=1:4)

 

# After we have obtained categorical covariates for age and prothrombin index, the commands are similar to the ones given above for treatment

 

 

# b) Univariate Cox regressions

 

# Cox regression for treatment

cox.treat=coxph(Surv(time,status)~treat,data=cirrhosis)

summary(cox.treat)

 

# The commands for sex and ascites are similar to the commands for treatment

 

# For the numeric covariates age and prothrombin index, we need to decide how thay should be coded

# (as given on the data file, or suitably transformed, or grouped).

 

# To see how age should be coded, we fit a model using a spline for age:

cox.psage=coxph(Surv(time,status)~pspline(age),data=cirrhosis)

print(cox.psage)

termplot(cox.psage,se=T)

 

# Both the plot (from the templot-command) and the test (from the print-command) show that it is reasonable to assume a log-linear effect of age

 

# We will therefore fit a Cox model using age as a numeric covariate.

# It may be sensible to report the effect of age per 10 years, so we define a new covariate

# where age is given per 10 years and fit a Cox model with this covariate

cirrhosis$age10= cirrhosis$age/10

cox.age10=coxph(Surv(time,status)~age10,data=cirrhosis)

summary(cox.age10)

 

# Then we use a spline fit to see how prothrombin index should be coded

cox.psprot=coxph(Surv(time,status)~pspline(prot),data=cirrhosis)

print(cox.psprot)

termplot(cox.psprot,se=T)

 

# The plot indicates that we do not have a log-linear effect for low ages.

# As there is not easy to find a suitable transformation, we chose to work with grouped prothrombin index

# (The non-linear part of prothrombin index is not significant, so could also be acceptable to use prothrombin index as a numeric covariate)

 

# When using grouped prothrombin index as a categorical covariate, it is natural to use group with highest values as reference:

cirrhosis$protgroup=relevel(cirrhosis$protgroup,ref=4)

cox.protgroup=coxph(Surv(time,status)~protgroup,data=cirrhosis)

summary(cox.protgroup)

 

 

# c) Multivariate Cox regression

 

# We then fit a Cox model with all the covariates

cox.all=coxph(Surv(time,status)~treat+sex+asc+age10+protgroup, data=cirrhosis)

summary(cox.all)

 

# [In principle it may be the case that the coding of a numeric covariate that is appropriate for a univariate analysis,

# is not appropriate for a multivariate analysis and vice versa. But this does not seem to be the case here (commands not shown)]

 

# We then check for first order interactions between any pair of two covariates:

# We may e.g. check for interaction between treatment and sex by the commands:

cox.int=coxph(Surv(time,status)~treat+sex+asc+age10+ protgroup+treat:sex, data=cirrhosis)

anova(cox.all,cox.int)

 

# Similar commands may be used to check other first order interactions

# [Note, however, that for checking interaction between ascites and prothrombin group,

# we need to merge the two highest prothrombin groups (since there is only one person with severe ascites in the highest prothrombin group)]

 

# We find an interaction between treatment and ascites and between sex and age.

# To ease the interpretation of the interaction between sex and the numeric covariate age, it is useful to

# center age by subtracting 60 years (which is close to the mean age)

cirrhosis$cage10=(cirrhosis$age-60)/10

cox.final=coxph(Surv(time,status)~treat+sex+asc+cage10+protgroup+treat:asc+sex:cage10, data=cirrhosis)

summary(cox.final)

 

 

# Log-linearity of the numeric covariates has been checked along the way using splines

# [both for the unvariate and mulivariate Cox models (the latter commands not given here)]

 

# We also need to check for proportional hazards:

cox.test=cox.zph(cox.final,transform='log')

print(cox.test)

par(mfrow=c(2,6))

plot(cox.test)

 

# There are no indication of violation of the assumption of proportional hazard

 

# Make Kaplan-Meier plots for the four groups:

par(mfrow=c(1,1))

plot(survfit(Surv(time,status)~pred.group,data=cirrhosis),mark.time=F,lty=1:4)

 

 

# For illustration we plot the estimated survival function for the following combinations of the covariates:

 

#��� 1) prednisone, female, no ascites , age 60, prothrombin above 90

#��� 2) prednisone, female, slight ascites , age 60, prothrombin 50-69

#��� 3) prednisone, female, slight ascites , age 60, prothrombin below 50

#��� 4) prednisone, female, marked ascites , age 60, prothrombin below 50

#��� 5) placebo, female, no ascites , age 60, prothrombin above 90

#��� 6) placebo, female, slight ascites , age 60, prothrombin 50-69

#��� 7) placebo, female, slight ascites , age 60, prothrombin below 50

#��� 8) placebo, female, marked ascites , age 60, prothrombin below 50

 

# Note that the levels of factors are given in quoteswhen defining the new covariate values below

 

cox.final=coxph(Surv(time,status)~treat+sex+asc+cage10+protgroup +treat:asc+sex:cage10, data=cirrhosis)

par(mfrow=c(1,2))

new.covariates=data.frame(treat=c(0,0,0,0),sex=c(0,0,0,0),asc=c("0","1","1","2"),cage10=c(0,0,0,0),protgroup=c("4","2","1","1"))

surv.final=survfit(cox.final,newdata=new.covariates)

plot(surv.final,mark.time=F, xlab="Years since randomization", ylab="Survival",lty=1:4, main="Prednisone")

legend("topright",c("1","2","3","4"),lty=1:4)

new.covariates=data.frame(treat=c(1,1,1,1),sex=c(0,0,0,0),asc=c("0","1","1","2"),cage10=c(0,0,0,0),protgroup=c("4","2","1","1"))

surv.final=survfit(cox.final,newdata=new.covariates)

plot(surv.final,mark.time=F, xlab="Years since randomization", ylab="Survival",lty=1:4, main="Placebo")

legend("topright",c("5","6","7","8"),lty=1:4)