#claims<-read.table("claims.txt",header=T) claims<-read.table("http://www.uio.no/studier/emner/matnat/math/STK3100/h10/undervisningsmateriale/claims.txt",header=T,row.names=NULL) #colnames(claims)<-c("age","volume","district","insured","claims") # #e) mod1<-glm(claims~factor(age)+factor(volume)+factor(district)+offset(log(insured)),family=poisson,data=claims)# kanonisk link: log summary(mod1) mod2<-update(mod1, ~. +factor(age)*factor(volume))# add interaction between age and volume mod3<-update(mod1, ~. +factor(age)*factor(district))#add interaction between age and volume mod4<-update(mod1, ~. +factor(district)*factor(volume))#add interaction between age and volume anova.glm(mod1,mod2) anova.glm(mod1,mod3) anova.glm(mod1,mod4)# no gain in adding interactions #f) summary(mod1) #g) l1<-exp(mod1$coeff[2:4]+qnorm(0.025)*sqrt(diag(vcov(mod1)[2:4,2:4]))) u1<-exp(mod1$coeff[2:4]+qnorm(0.975)*sqrt(diag(vcov(mod1)[2:4,2:4]))) # rate ratios for age w.r.t reference (under 25) l2<-exp(mod1$coeff[5:7]+qnorm(0.025)*sqrt(diag(vcov(mod1)[5:7,5:7]))) u2<-exp(mod1$coeff[5:7]+qnorm(0.975)*sqrt(diag(vcov(mod1)[5:7,5:7]))) # rate ratios for volume w.r.t reference (under 1 liter) l3<-exp(mod1$coeff[8:10]+qnorm(0.025)*sqrt(diag(vcov(mod1)[8:10,8:10]))) u3<-exp(mod1$coeff[8:10]+qnorm(0.975)*sqrt(diag(vcov(mod1)[8:10,8:10]))) ci<-cbind(exp(mod1$coeff[2:10]),c(l1,l2,l3),c(u1,u2,u3)) # rate ratios for district w.r.t reference (district 1) #h) mod5<-glm(claims~age+volume+factor(district)+offset(log(insured)),family=poisson,data=claims)# kanonisk link: log summary(mod5)# model with age and motor volumes as quantitative (numerical) covariates anova.glm(mod5,mod1) claims$bindis<-claims$district claims$bindis[claims$bindis ==2]<-1 claims$bindis[claims$bindis ==3]<-1 claims$bindis[claims$bindis == 4]<-2 mod6<-glm(claims~age+volume+factor(bindis)+offset(log(insured)),family=poisson,data=claims)# kanonisk link: log summary(mod6)# model with age and motor volumes as quantitative (numerical) covariates anova.glm(mod6,mod5) #i) resp<-residuals(mod6,"pearson") resd<-residuals(mod6,"deviance") crossprod(resp,resp)# Pearson's chi2, 4x4x4-4=60 degrees of fredom plot(mod6$fitted,resd,xlab="fitted values", ylab="residuals") #variance seems to larger for small fitted values plot(claims$age,resd,xlab="age", ylab="residuals") plot(claims$volume,resd,xlab="volume", ylab="residuals")# plot(claims$district,resd,xlab="district", ylab="residuals") #j) aux<-mod6$coeff[1]+mod6$coeff[2]*2 aux<-aux + mod6$coeff[3]*3+mod6$coeff[4] # second category age, volume and bindis # 39 insured in this group ra1<-39*exp(aux) ra2<-1000*exp(aux)# rate pr 1000 c<-as.matrix(c(1,2,3,1)) var<-t(c)%*%vcov(mod6)%*%c lb<-exp(aux-1.96*sqrt(var)) ub<-exp(aux+1.96*sqrt(var)) c(lb,ub)# approximate 95% c. i. #k) # negative binomial library(MASS) #mod7<-glm.nb(claims~age+volume+factor(bindis)+offset(log(insured)),glm.control(maxit=100),data=claims) mod7<-glm.nb(claims~age+volume+factor(bindis)+offset(log(insured)),data=claims) summary(mod7) cbind(mod6$coeff,mod7$coeff) anova(mod6,mod7)# no evidence of overdispersion, marginal improvement of likelihood #k) newv<-claims$claims/claims$insured claims<-data.frame(claims,newv) mod8<-glm(newv~age+volume+factor(bindis),weights=insured,family=poisson(link="log"),data=claims)# summary(mod8) %resp_8<-residuals(mod8,"pearson") %resd_8<-residuals(mod8,"deviance") %plot(mod8$fitted,resd_8,xlab="fitted values", ylab="residuals") mod9<-glm(newv~age+volume+factor(bindis),weights=insured,family=quasi(link="log",variance="mu"),data=claims)# summary(mod9) cbind(mod6$coeff,mod8$coeff,mod9$coeff) #l) mod10<-glm(newv~age+volume+factor(bindis),weights=insured,family=quasi(link="power(0.25)",variance="mu"),data=claims)# summary(mod10) mod11<-glm(newv~age+volume+factor(bindis),weights=insured,family=quasi(link="power(0.50)",variance="mu"),data=claims)# summary(mod11) mod12<-glm(newv~age+volume+factor(bindis),weights=insured,family=quasi(link="power(0.10)",variance="mu"),data=claims)# summary(mod12) #mod10b<-glm(newv~age+volume+factor(bindis),weights=insured,family=poisson(link="power(0.50)"),data=claims)# #summary(mod10b) cbind(mod8$coeff,mod12$coeff,mod10$coeff,mod11$coeff) eta1<-c((mod10$coeff[1]-1)/0.25,mod10$coeff[2]/0.25,mod10$coeff[3]/0.25,mod10$coeff[4]/0.25) eta2<-c((mod11$coeff[1]-1)/0.5,mod11$coeff[2]/0.5,mod11$coeff[3]/0.5,mod11$coeff[4]/0.5) eta3<-c((mod12$coeff[1]-1)/0.1,mod12$coeff[2]/0.1,mod12$coeff[2]/0.1,mod12$coeff[4]/0.1) cbind(mod8$coeff,eta3,eta1,eta2) resd<-residuals(mod10,"deviance") plot(mod10$fitted,resd,xlab="fitted values", ylab="residuals")