####################### #Data used in analysis# ####################### #data are downloaded from the "National Archive of Criminal Justice Data" website (http://www.icpsr.umich.edu/NACJD/) and then cleaned as needed #obs is the matrix of observations, 3*5 matrix source("data.txt") ##################### #R code for analysis# ##################### #classic model: to get the prior information for the Bayesian model classic_model<-function(obs) {#a stores the complete data (y) matrix a<-matrix(0,17,2) #store the observed data from obs (x) to y matrix a[1,1]<-obs[1,1] a[1,2]<-obs[1,2] a[3,1]<-obs[2,1] a[3,2]<-obs[2,2] a[5,1]<-obs[3,1] a[5,2]<-obs[3,2] a[6,1]<-obs[4,1] a[6,2]<-obs[4,2] a[7,2]<-obs[5,2] #use the mean to filling in the missing data for (i in c(2,4,7)) a[i,1]<-obs[5,1]/3 for (i in c(8,11,14,16)) {j<-(i-8)/3+1 a[i,1]<-obs[j,3]/2 a[i,2]<-obs[j,3]/2 } tmp<-obs[5,3]/10 a[9,1]<-tmp a[12,1]<-tmp for(i in c(10,13,15,17)) {a[i,1]<-tmp a[i,2]<-tmp } #iteration n<-0 #iteration times swit<-1 #control the iteration opo<-0 #store the previously estimated probability oto<-0 oro<-0 odo<-0 opr<-0 opd<-0 ope<-0 opp<-0 opn<-0 ops<-0 opsb<-0 #initial values for probabilities while(swit==1 && n<=1000) { #calculate probabilities npsum<-sum(a[1:7,]) #number of phone interview psum<-sum(a[8:17,]) #number of personal interview po<-psum/(npsum+psum) #p hat rape<-a[1,1]+a[8,1]+a[10,1] #number of rapes that should have been reported when spouse is present nrape<-a[2,1]+a[9,1] #number of rapes not reported because of spouse present ro<-rape/(rape+nrape) #rho hat dom<-a[3,1]+a[11,1]+a[13,1] #number of domestic violence that should have been reported when spous is present ndom<-a[4,1]+a[12,1] #number of domestic violences not reported because of spouse present doo<-dom/(dom+ndom) #delta hat c<-apply(a,1,sum) to<-(c[8]+c[11]+c[14])/(c[8]+c[11]+c[14]+c[10]+c[13]+c[15]) #t hat s<-rep(0,2) #Assume the independence of w matrix, store the column probability p<-rep(0,5) #store the row probability w<-matrix(0,5,2) tot<-sum(a) s[1]<-sum(a[,1])/tot #ps hat: probability that spouse is present during the interview s[2]<-sum(a[,2])/tot #psb hat: probability that spouse is not present during the interview p[1]<-(c[1]+c[2]+c[8]+c[9]+c[10])/tot #probability of rape p[2]<-(c[3]+c[4]+c[11]+c[12]+c[13])/tot #probability of domestic violence p[3]<-(c[5]+c[14]+c[15])/tot #probability of other assault p[4]<-(c[6]+c[16])/tot #probability of personal larceny p[5]<-(c[7]+c[17])/tot #probability of no crime reported for (i in 1:5) #use the independence assumption for the cell probabilities for (j in 1:2) {w[i,j]<-p[i]*s[j]} #w hat(i,j) #check convergence b<-sum(abs(opo-po),abs(oro-ro),abs(odo-doo),abs(oto-to),abs(opr-p[1]),abs(opd-p[2]),abs(ope-p[3]),abs(opp-p[4]),abs(opn-p[5]),abs(ops-s[1])) if (b<=0.0001) #diagonose as converged when the sum of absolute difference is smaller than 0.0001 swit<-0 #store the estimated probabilities of this iteration for next iteration opo<-po oro<-ro odo<-doo oto<-to opr<-p[1] opd<-p[2] ope<-p[3] opp<-p[4] opn<-p[5] ops<-s[1] #obtain the updated complete data counts: E-step w1<-(1-po)*(1-ro)*w[1,1] w2<-(1-po)*(1-doo)*w[2,1] w3<-(1-po)*w[5,1] tmp<-obs[5,1]/(w1+w2+w3) a[2,1]<-tmp*w1 a[4,1]<-tmp*w2 a[7,1]<-tmp*w3 w1<-po*to*ro*w[1,1] w2<-po*to*w[1,2] tmp<-obs[1,3]/(w1+w2) a[8,1]<-w1*tmp a[8,2]<-w2*tmp w1<-po*to*doo*w[2,1] w2<-po*to*w[2,2] tmp<-obs[2,3]/(w1+w2) a[11,1]<-w1*tmp a[11,2]<-w2*tmp w1<-po*to*w[3,1] w2<-po*to*w[3,2] tmp<-obs[3,3]/(w1+w2) a[14,1]<-w1*tmp a[14,2]<-w2*tmp w1<-po*w[4,1] w2<-po*w[4,2] tmp<-obs[4,3]/(w1+w2) a[16,1]<-w1*tmp a[16,2]<-w2*tmp w1<-po*(1-ro)*w[1,1] w2<-po*(1-to)*ro*w[1,1] w3<-po*(1-to)*w[1,2] w4<-po*(1-doo)*w[2,1] w5<-po*doo*(1-to)*w[2,1] w6<-po*(1-to)*w[2,2] w7<-po*(1-to)*w[3,1] w8<-po*(1-to)*w[3,2] w9<-po*w[5,1] w10<-po*w[5,2] tmp<-obs[5,3]/(w1+w2+w3+w4+w5+w6+w7+w8+w9+w10) a[9,1]<-tmp*w1 a[10,1]<-tmp*w2 a[10,2]<-tmp*w3 a[12,1]<-tmp*w4 a[13,1]<-tmp*w5 a[13,2]<-tmp*w6 a[15,1]<-tmp*w7 a[15,2]<-tmp*w8 a[17,1]<-tmp*w9 a[17,2]<-tmp*w10 n<-n+1 #count the iteration number } result<-list(n=n,w=w,rate=apply(w,1,sum)*1000,po=po,to=to,ro=ro,doo=doo,y=a,s=s,p=p) result } #Example classic_model(obs1) #93-97 unweighted data classic_model(obs2) #93-97 weighted data ##################################################################### #emperical bayesian model bayesian_model<-function(obs,pro,pdoo,pto) #pro,pdoo and pto are prior information: the estimated rates from the old data {#a stores the complete data (y) matrix a<-matrix(0,17,2) #store the observed data from obs (x) to y matrix a[1,1]<-obs[1,1] a[1,2]<-obs[1,2] a[3,1]<-obs[2,1] a[3,2]<-obs[2,2] a[5,1]<-obs[3,1] a[5,2]<-obs[3,2] a[6,1]<-obs[4,1] a[6,2]<-obs[4,2] a[7,2]<-obs[5,2] #use the mean to filling in the missing data for (i in c(2,4,7)) a[i,1]<-obs[5,1]/3 for (i in c(8,11,14,16)) {j<-(i-8)/3+1 a[i,1]<-obs[j,3]/2 a[i,2]<-obs[j,3]/2 } tmp<-obs[5,3]/10 a[9,1]<-tmp a[12,1]<-tmp for(i in c(10,13,15,17)) {a[i,1]<-tmp a[i,2]<-tmp } #iteration n<-0 #iteration times swit<-1 #control the iteration opo<-0 #store the previously estimated probability oto<-0 oro<-0 odo<-0 opr<-0 opd<-0 ope<-0 opp<-0 opn<-0 ops<-0 opsb<-0 #initial values for probabilities while(swit==1 && n<=1000) {#calculate probabilities npsum<-sum(a[1:7,]) #number of phone interview psum<-sum(a[8:17,]) #number of personal interview po<-psum/(npsum+psum) #p hat, use the classic method rape<-a[1,1]+a[8,1]+a[10,1] #number of rapes that should have been reported when spous is present nrape<-a[2,1]+a[9,1] #number of rapes not reported because of spouse present ro<-pseudo(rape,nrape,pro) #r hat unweighted: 0.14387, use the pseudo Bayesian method dom<-a[3,1]+a[11,1]+a[13,1] #number of domestic violence that should have been reported when spous is present ndom<-a[4,1]+a[12,1] #number of domestic violences not reported because of spouse present doo<-pseudo(dom,ndom,pdoo) #d hat unweighted: 0.06976028, use the pseudo Bayesian method c<-apply(a,1,sum) tel<-c[8]+c[11]+c[14] ntel<-c[10]+c[13]+c[15] #number of crimes not reported because of telephone interview to<-pseudo(tel,ntel,pto) #t hat unweighted: 0.5277342, use the pseudo Bayesian method s<-rep(0,2) #Assume the independence of w matrix, store the column probability p<-rep(0,5) #store the row probability w<-matrix(0,5,2) tot<-sum(a) s[1]<-sum(a[,1])/tot #ps hat: probability that spouse is present during the interview, classic method s[2]<-sum(a[,2])/tot #psb hat: probability that spouse is not present during the interview p[1]<-(c[1]+c[2]+c[8]+c[9]+c[10])/tot #probability of rape, classic method p[2]<-(c[3]+c[4]+c[11]+c[12]+c[13])/tot #probability of domestic violence p[3]<-(c[5]+c[14]+c[15])/tot #probability of other assault p[4]<-(c[6]+c[16])/tot #probability of personal larceny p[5]<-(c[7]+c[17])/tot #probability of no crime reported for (i in 1:5) #use the independence assumption for the cell probabilities for (j in 1:2) {w[i,j]<-p[i]*s[j]} #w hat(i,j) #check convergence b<-sum(abs(opo-po),abs(oro-ro),abs(odo-doo),abs(oto-to),abs(opr-p[1]),abs(opd-p[2]),abs(ope-p[3]),abs(opp-p[4]),abs(opn-p[5]),abs(ops-s[1])) if (b<=0.0001) #diagonose as converged when the sum of absolute difference is smaller than 0.0001, store the risk when convergence met. {swit<-0 kro<-prisk(rape,nrape,pro) kdo<-prisk(dom,ndom,pdoo) kto<-prisk(tel,ntel,pto) } #store the estimated probabilities of this iteration for next iteration opo<-po oro<-ro odo<-doo oto<-to opr<-p[1] opd<-p[2] ope<-p[3] opp<-p[4] opn<-p[5] ops<-s[1] #obtain the updated complete data counts: E-step w1<-(1-po)*(1-ro)*w[1,1] w2<-(1-po)*(1-doo)*w[2,1] w3<-(1-po)*w[5,1] tmp<-obs[5,1]/(w1+w2+w3) a[2,1]<-tmp*w1 a[4,1]<-tmp*w2 a[7,1]<-tmp*w3 w1<-po*to*ro*w[1,1] w2<-po*to*w[1,2] tmp<-obs[1,3]/(w1+w2) a[8,1]<-w1*tmp a[8,2]<-w2*tmp w1<-po*to*doo*w[2,1] w2<-po*to*w[2,2] tmp<-obs[2,3]/(w1+w2) a[11,1]<-w1*tmp a[11,2]<-w2*tmp w1<-po*to*w[3,1] w2<-po*to*w[3,2] tmp<-obs[3,3]/(w1+w2) a[14,1]<-w1*tmp a[14,2]<-w2*tmp w1<-po*w[4,1] w2<-po*w[4,2] tmp<-obs[4,3]/(w1+w2) a[16,1]<-w1*tmp a[16,2]<-w2*tmp w1<-po*(1-ro)*w[1,1] w2<-po*(1-to)*ro*w[1,1] w3<-po*(1-to)*w[1,2] w4<-po*(1-doo)*w[2,1] w5<-po*doo*(1-to)*w[2,1] w6<-po*(1-to)*w[2,2] w7<-po*(1-to)*w[3,1] w8<-po*(1-to)*w[3,2] w9<-po*w[5,1] w10<-po*w[5,2] tmp<-obs[5,3]/(w1+w2+w3+w4+w5+w6+w7+w8+w9+w10) a[9,1]<-tmp*w1 a[10,1]<-tmp*w2 a[10,2]<-tmp*w3 a[12,1]<-tmp*w4 a[13,1]<-tmp*w5 a[13,2]<-tmp*w6 a[15,1]<-tmp*w7 a[15,2]<-tmp*w8 a[17,1]<-tmp*w9 a[17,2]<-tmp*w10 n<-n+1 #count the iteration number } result<-list(n=n,w=w,rate=apply(w,1,sum)*1000,po=po,to=to,ro=ro,doo=doo,y=a,s=s,p=p,kro=kro,kdo=kdo,kto=kto) result } ###########################emperical-baysian estimator for binomial dist pseudo<-function(x1,x2,p) {N<-x1+x2 k<-2*x1*x2/(x1^2+x2^2-2*N*(x1*p+x2-x2*p)+N^2*(p^2+(1-p)^2)) phat<-N/(N+k)*(x1/N)+k/(N+k)*p phat } pseudo4<-function(x1,x2,x3,x4,p1,p2,p3,p4) {N<-x1+x2+x3+x4 k<-(N^2-x1^2-x2^2-x3^2-x4^2)/(x1^2+x2^2+x3^2+x4^2-2*N*(x1*p1+x2*p2+x3*p3+x4*p4)+N^2*(p1^2+p2^2+p3^2+p4^2)) ph1<-N/(N+k)*(x1/N)+k/(N+k)*p1 ph2<-N/(N+k)*(x2/N)+k/(N+k)*p2 ph3<-N/(N+k)*(x3/N)+k/(N+k)*p3 ph4<-1-ph1-ph2-ph3 c(ph1,ph2,ph3,ph4) } prisk<-function(x1,x2,p) {N<-x1+x2 k<-2*x1*x2/(x1^2+x2^2-2*N*(x1*p+x2-x2*p)+N^2*(p^2+(1-p)^2)) risk<-2*x1*x2/(N+k)^2+k^2/(N*(N+k)^2)*(x1^2+x2^2-2*N*(x1*p+x2-x2*p)+N^2*(p^2+(1-p)^2)) c(risk,k,k/(N+k)) } prisk4<-function(x1,x2,x3,x4,p1,p2,p3,p4) {N<-x1+x2+x3+x4 k<-(N^2-x1^2-x2^2-x3^2-x4^2)/(x1^2+x2^2+x3^2+x4^2-2*N*(x1*p1+x2*p2+x3*p3+x4*p4)+N^2*(p1^2+p2^2+p3^2+p4^2)) risk<-(N^2-x1^2-x2^2-x3^2-x4^2)/(N+k)^2+k^2/(N*(N+k)^2)*(x1^2+x2^2+x3^2+x4^2-2*N*(x1*p1+x2*p2+x3*p3+x4*p4)+N^2*(p1^2+p2^2+p3^2+p4^2)) c(risk,k,k/(N+k)) } #example bayesian_model(obs3,0.14387,0.06976028,0.5277342) #the prior information is from unweighted data 1993-1997 bayesian_model(obs4,0.1399297,0.06491641,0.5347465) #the prior information is from weighted data 1993-1997 ################################################################################ #Jackknife method to calculate the variance of estimation npsu <- 28 #number of psu class_j_n<-rep(0,npsu) #store the results of class_jic model class_j_rate<-matrix(0,npsu,5) class_j_po<-rep(0,npsu) class_j_to<-rep(0,npsu) class_j_ro<-rep(0,npsu) class_j_doo<-rep(0,npsu) class_j_s<-matrix(0,npsu,2) class_j_p<-matrix(0,npsu,5) bayes_j_n<-rep(0,npsu) #store the results of Bayesian model bayes_j_rate<-matrix(0,npsu,5) bayes_j_po<-rep(0,npsu) bayes_j_to<-rep(0,npsu) bayes_j_ro<-rep(0,npsu) bayes_j_doo<-rep(0,npsu) bayes_j_s<-matrix(0,npsu,2) bayes_j_p<-matrix(0,npsu,5) bayes_j_kro<-matrix(0,npsu,3) bayes_j_kdo<-matrix(0,npsu,3) bayes_j_kto<-matrix(0,npsu,3) w_class_j_n<-rep(0,npsu) #store the results of class_jic model for weighted data w_class_j_rate<-matrix(0,npsu,5) w_class_j_po<-rep(0,npsu) w_class_j_to<-rep(0,npsu) w_class_j_ro<-rep(0,npsu) w_class_j_doo<-rep(0,npsu) w_class_j_s<-matrix(0,npsu,2) w_class_j_p<-matrix(0,npsu,5) w_bayes_j_n<-rep(0,npsu) #store the results of bayes_jian model for weighted data w_bayes_j_rate<-matrix(0,npsu,5) w_bayes_j_po<-rep(0,npsu) w_bayes_j_to<-rep(0,npsu) w_bayes_j_ro<-rep(0,npsu) w_bayes_j_doo<-rep(0,npsu) w_bayes_j_s<-matrix(0,npsu,2) w_bayes_j_p<-matrix(0,npsu,5) w_bayes_j_kro<-matrix(0,npsu,3) w_bayes_j_kdo<-matrix(0,npsu,3) w_bayes_j_kto<-matrix(0,npsu,3) temp<-1:28 for (u in 1:npsu) {boots<-temp[-u] # get the Jackknife data sum_temp<-matrix(0,7,6) sum_temp_w<-matrix(0,7,6) for (i in boots) {sum_temp<-sum_temp+ncvs_9804_unweighted[,,i] sum_temp_w<-sum_temp_w+ncvs_9804_weighted[,,i] } group_sum_temp_w<-matrix(0,4,6) group_sum_temp_w[1,]<-sum_temp_w[1,] group_sum_temp_w[2,]<-sum_temp_w[2,]+sum_temp_w[3,] group_sum_temp_w[3,]<-sum_temp_w[4,]+sum_temp_w[5,] group_sum_temp_w[4,]<-sum_temp_w[7,] dimnames(group_sum_temp_w)<-list(c("telephone interview", "spouse present", "spouse not present", "all interviews"), c("#Interviews", "Rape", "Dom_Vio", "Other_Ass", "Per_Lar", "No")) group_sum_temp<-matrix(0,4,6) group_sum_temp[1,]<-sum_temp[1,] group_sum_temp[2,]<-sum_temp[2,]+sum_temp[3,] group_sum_temp[3,]<-sum_temp[4,]+sum_temp[5,] group_sum_temp[4,]<-sum_temp[7,] dimnames(group_sum_temp)<-list(c("telephone interview", "spouse present", "spouse not present", "all interviews"), c("#Interviews", "Rape", "Dom_Vio", "Other_Ass", "Per_Lar", "No")) obs<-t(group_sum_temp)[2:6,c(2,3,1)] #work on the unweighted data result1<-classic_model(obs) class_j_n[u]<-result1$n #store the results of classic model class_j_rate[u,]<-result1$rate class_j_po[u]<-result1$po class_j_to[u]<-result1$to class_j_ro[u]<-result1$ro class_j_doo[u]<-result1$doo class_j_s[u,]<-result1$s class_j_p[u,]<-result1$p result2<- bayesian_model(obs,0.14387,0.06976028,0.5277342) bayes_j_n[u]<-result2$n #store the results of Bayesian model bayes_j_rate[u,]<-result2$rate bayes_j_po[u]<-result2$po bayes_j_to[u]<-result2$to bayes_j_ro[u]<-result2$ro bayes_j_doo[u]<-result2$doo bayes_j_s[u,]<-result2$s bayes_j_p[u,]<-result2$p bayes_j_kro[u,]<-result2$kro bayes_j_kdo[u,]<-result2$kdo bayes_j_kto[u,]<-result2$kto obs<-t(group_sum_temp_w)[2:6,c(2,3,1)] #work on the weighted data result1<-classic_model(obs) w_class_j_n[u]<-result1$n #store the results of classic model w_class_j_rate[u,]<-result1$rate w_class_j_po[u]<-result1$po w_class_j_to[u]<-result1$to w_class_j_ro[u]<-result1$ro w_class_j_doo[u]<-result1$doo w_class_j_s[u,]<-result1$s w_class_j_p[u,]<-result1$p result2<- bayesian_model(obs,0.1407742,0.06924947,0.5345764) w_bayes_j_n[u]<-result2$n #store the results of Bayesian model w_bayes_j_rate[u,]<-result2$rate w_bayes_j_po[u]<-result2$po w_bayes_j_to[u]<-result2$to w_bayes_j_ro[u]<-result2$ro w_bayes_j_doo[u]<-result2$doo w_bayes_j_s[u,]<-result2$s w_bayes_j_p[u,]<-result2$p w_bayes_j_kro[u,]<-result2$kro w_bayes_j_kdo[u,]<-result2$kdo w_bayes_j_kto[u,]<-result2$kto }