# S-PLUS code for the detection of spatial clustering program # called MEET (Maximized Excess Events Test) # # August 2002 (version 2.00) # # Programmed by # # Toshiro Tango # Department of Technology Assessment and Biostatistics # National Institute of Public Health # 2-3-6 Minami, Wako, Saitama, 351-0197, Japan # E-mail: tango@niph.go.jp # # ---------------------------------------------------------------- # ( README FIRST ) # 1. Please read my paper carefully before using this program. # 2. You are free to use this program for noncommercial purposes # even to modify it but please cite this original program # # Reference: # Tango, T. A test for spatial disease clustering adjusted for # multiple testing. Statistics in Medicine 19, 191-204 (2000). # ----------------------------------------------------------------- # INPUT: # # nloop: The number of Monte Carlo replications, e.g., 999, 9999. # us$x: x-value of region centroid, e.g., longitude # us$y: y-value of region centroid, e.g., latitude # dc: distance matrix calculated from us$x and us$y # ( Depending on the user's situation, this should be modified !) # dat$j: j-th category of the confounders included (j=1,2,...,nk ) # dat$d: observed number of cases # dat$e: expected number of cases ( or population size) # kvec: a sequence of lambda values used for the profile p-values # # ------------------------------------------------------------------ # Major variables: # # nk: the number of strata or categories of all the confounders # included. For example, if age and sex are the confounders # included, with 18 different age groups, then there should be # 18x2 = 36 ( = nk ) categories. # nr: the number of regions # nn: the number of cases # kkk: the number of lambda values # pxx: sorted p-values due to Monte Carlo replications under # the null # ss : per cent contribution to C (equation 14) # top: region id sorted in the order of the magnitude of ss # (e.g., top[1] denote the most likely location of cluster, # if any.) # pres: The resultant adjusted p-value based on Monte Carlo # replicates # # +++++ An example of execution ++++++++++++++++++++++++++++++++++++ # # 100 cases: the random sample from clustering model A shown in # Figure 2 of my paper. The result will be the one # similar to Figure 3 and 4. The difference will be # due to Monte Carlo replicates produced. # These data files are added at the end of email. # # nloop<-999 nk<- 1 # # The kvec below is set only for the example data: # Depending on the data, it should be provided by the user # kvec<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) kvec<- kvec*2 # us <-scan("d:/demo/kanto.geo",list(id=0,x=0,y=0)) dat <-scan("d:/demo/kanto.dat",list(id=0,j=0,d=0,e=0)) # # These two data files are added at the end of this file. # It takes about 13 minutes using IBM thinkpad 600E. # # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ par(mfrow=c(1,1)) start<-date() nr<-length(us$id) qdat<-rep(0,nr) p<-rep(0,nr) w<-matrix(0,nr,nr) nn<-sum(dat$d) kkk<- length(kvec) dc<-matrix(0,nr,nr) regid<-us$id frq<-matrix(0,nloop,nr) pval<-matrix(0,nloop,kkk) pxx<-rep(0,nloop) pcc<-pxx for (i in 1:nr){ for (j in i:nr){ dij<-sqrt(((us$x[i]-us$x[j])*90.15)**2 + # An example ((us$y[i]-us$y[j])*110.9)**2) # for Tokyo area # dij<-sqrt(((us$x[i]-us$x[j]))**2 + # ((us$y[i]-us$y[j]))**2) dc[j,i]<- dij dc[i,j]<- dij } } # # Calculation of (1) null distribution of test statistic Pmin # for (j in 1:nk){ jdat<- dat$d[dat$j == j] nj<-sum(jdat) poo<-dat$e[dat$j == j] p1<-poo/sum(poo) pp<-matrix(p1) w1<-diag(p1)-pp%*%t(pp) qdat<- qdat + jdat p<- p+nj*p1 w<- w+nj*w1 py<-p1 vv<-py/sum(py) cvv<-cumsum(vv) for (ijk in 1:nloop) { r1<-hist(runif(nj),breaks=c(0,cvv),plot=F) frq[ijk,]<- frq[ijk,] + r1$count } } frq<-frq/nn ori<- qdat qdat<-qdat/nn w<- w/nn p<-p/nn symbols(us$x,us$y,circles=p,inch=0.50) # Figure 1 text(us$x,us$y,regid,adj= -0.5, col=8) # for (k in 1:kkk){ rad<- kvec[k] ac <- exp( -4 * (dc/rad)^2 ) hh <- ac%*%w hh2<- hh%*%hh av <- sum(diag( hh )) av2<- sum(diag( hh2 )) av3<- sum(diag( hh2%*%hh )) skew1<- 2*sqrt(2)*av3/ (av2)^1.5 df1<- 8/skew1/skew1 eg1<-av vg1<-2*av2 for (ijk in 1:nloop){ q<-frq[ijk,] gt<-(q-p)%*%ac%*%(q-p)*nn stat1<-(gt-eg1)/sqrt(vg1) aprox<-df1+sqrt(2*df1)*stat1 pval[ijk,k]<- 1-pchisq(aprox, df1) } } for (ijk in 1:nloop){ pcc[ijk]<-min( pval[ijk,] ) } pxx<- sort( pcc ) # # Calculation of (2) test statistic Pmin and its adjusted p-value # of observed data # ptan<-rep(0,kkk) gtt<-rep(0,kkk) jjj<-1:kkk ep<-p*nn po<-ppois(ori,ep) pa<-(po-0.90)/0.10 p1<-ifelse(po > 0.9 & ori>0, pa^3, 0) symbols(us$x,us$y,circle=p1,inch=0.2) # Figure 2 text(us$x, us$y, ori, col=8) par(cex=0.5) par(mar=c(4,3,3,5)) par(mfrow=c(1,2)) par(mgp=c(1.5,0.5,0)) # for (k in 1:kkk) { ti<-date() rad<- kvec[k] ac <- exp( -4 * (dc/rad)^2 ) hh <- ac%*%w hh2<- hh%*%hh av <- sum(diag( hh )) av2<- sum(diag( hh2 )) av3<- sum(diag( hh2%*%hh )) skew1<- 2*sqrt(2)*av3/ (av2)^1.5 df1<- 8/skew1/skew1 eg1<-av vg1<-2*av2 gt<-(qdat-p)%*%ac%*%(qdat-p)*nn gtt[k]<-gt stat1<-(gt-eg1)/sqrt(vg1) aprox<-df1+sqrt(2*df1)*stat1 ptan[k]<- 1-pchisq(aprox, df1) } # # Figure 3 p22<-min( ptan ) at<-c(pxx,p22) pres<-length(at[at<=p22])/(length(pxx)+1) pind<-jjj[ptan==min(ptan)] plot(kvec,ptan,pch=1,type="b",xlab="cluster size ( lambda values ) ", ylab=" Unadjusted p-values") abline(v=kvec[pind]) text((max(kvec)+2*min(kvec))/3,(3*max(ptan)+min(ptan))/4, "Adjusted p-value = ",col=8) text((1.5*max(kvec)+min(kvec))/2.5,(3*max(ptan)+min(ptan))/4, pres,col=8) # # Figure 4 rad<- kvec[pind] ac <- exp( -4 * (dc/rad)^2 ) ss<-ac%*%(qdat-p)*nn ss<-(qdat-p)*ss ss2<-ss ss<-ss/sum(ss)*100 plot(regid,ss,pch=1,xlab=" region ID ", ylab=" Percent contribution ") text(regid+2,ss+1,regid,col=8) top<-regid[sort.list(-ss)] end<- date() # # End of S-plus file # ---------------------------------------------------------------- # Below are two data sets for example run # # example data : kanto.geo (id, longitude, latitude) # 1 139.755 35.69056 2 139.7753 35.6675 3 139.7544 35.655 4 139.7075 35.68972 5 139.7564 35.70472 6 139.7844 35.70889 7 139.7989 35.69472 8 139.8197 35.66833 9 139.7333 35.60583 10 139.6956 35.62889 11 139.7236 35.57611 12 139.6564 35.64278 13 139.7014 35.66083 14 139.6669 35.70361 15 139.6397 35.69639 16 139.7186 35.72917 17 139.7369 35.74944 18 139.7867 35.72444 19 139.7122 35.74778 20 139.6553 35.7325 21 139.8036 35.74389 22 139.8508 35.74 23 139.8717 35.70333 24 139.3192 35.66333 25 139.4225 35.69083 26 139.5694 35.71472 27 139.5625 35.68 28 139.2781 35.785 29 139.4808 35.66556 30 139.3667 35.70806 31 139.5442 35.6475 32 139.45 35.54528 33 139.5064 35.69611 34 139.4806 35.72528 35 139.3983 35.66806 36 139.4717 35.75139 37 139.4656 35.70778 38 139.4444 35.68056 39 139.5417 35.7225 40 139.5622 35.73833 41 139.33 35.73556 42 139.5819 35.63167 43 139.4297 35.74222 44 139.5297 35.7825 45 139.5231 35.75306 46 139.3906 35.75167 47 139.4497 35.63389 48 139.5078 35.63472 49 139.2969 35.72611 50 139.3142 35.76417 51 139.3572 35.76889 52 139.2361 35.74139 53 139.2233 35.72667 54 139.1519 35.72361 55 139.0994 35.80639 56 139.6856 35.50556 57 139.6328 35.47389 58 139.6197 35.45056 59 139.6453 35.44139 60 139.6119 35.42833 61 139.5992 35.45667 62 139.6219 35.39917 63 139.6278 35.33417 64 139.6364 35.51583 65 139.5358 35.39306 66 139.5944 35.39722 67 139.5481 35.47139 68 139.5411 35.50917 69 139.5022 35.46306 70 139.5569 35.36139 71 139.4917 35.41444 72 139.7072 35.52639 73 139.6906 35.54056 74 139.6592 35.57306 75 139.6108 35.59639 76 139.5756 35.61694 77 139.5825 35.58611 78 139.5094 35.60083 79 139.6753 35.27889 80 139.3531 35.33222 81 139.5511 35.31528 82 139.4947 35.33583 83 139.1556 35.26139 84 139.4078 35.33056 85 139.5833 35.29222 86 139.3764 35.56806 87 139.6239 35.14083 88 139.2233 35.37139 89 139.3656 35.44 90 139.4611 35.48417 91 139.3183 35.39944 92 139.3986 35.44694 93 139.3975 35.48111 94 139.1031 35.3175 95 139.4325 35.43361 96 139.59 35.26917 97 139.3869 35.36972 98 139.3144 35.30361 99 139.2581 35.29639 100 139.2219 35.3275 101 139.1594 35.32361 102 139.1425 35.345 103 139.0864 35.35917 104 139.1264 35.33306 105 139.1097 35.22944 106 139.1406 35.155 107 139.1114 35.14472 108 139.3247 35.52583 109 139.2797 35.47889 110 139.3061 35.5925 111 139.2594 35.58306 112 139.1922 35.61111 113 139.1586 35.61167 # # example data: kanto.dat (id, stratum no, number of cases, population) # 1 1 0 39472 2 1 0 68041 3 1 0 158499 4 1 2 296790 5 1 2 181269 6 1 1 162969 7 1 0 222944 8 1 3 385159 9 1 4 344611 10 1 2 251222 11 1 1 647914 12 1 3 789051 13 1 0 205625 14 1 6 319687 15 1 8 529485 16 1 0 261870 17 1 2 354647 18 1 0 184809 19 1 2 518943 20 1 8 618663 21 1 4 631163 22 1 0 424801 23 1 1 565939 24 1 2 466347 25 1 0 152824 26 1 2 139077 27 1 0 165564 28 1 1 125960 29 1 0 209396 30 1 0 105372 31 1 0 197677 32 1 2 349050 33 1 0 105899 34 1 0 164013 35 1 0 165928 36 1 1 134002 37 1 0 100982 38 1 1 65833 39 1 0 75144 40 1 1 95146 41 1 0 58062 42 1 2 74189 43 1 0 75132 44 1 1 67539 45 1 0 113818 46 1 0 65562 47 1 2 144489 48 1 1 58635 49 1 0 50387 50 1 1 52103 51 1 0 30967 52 1 0 16444 53 1 0 21553 54 1 0 3808 55 1 0 8752 56 1 1 250100 57 1 0 205510 58 1 0 76978 59 1 0 116634 60 1 0 194617 61 1 2 195795 62 1 1 168846 63 1 0 197753 64 1 3 305774 65 1 1 238536 66 1 1 224036 67 1 0 248882 68 1 2 426663 69 1 0 119575 70 1 0 123766 71 1 2 126866 72 1 0 200056 73 1 1 142320 74 1 0 187707 75 1 0 165081 76 1 0 175570 77 1 2 177742 78 1 0 125127 79 1 3 433358 80 1 0 245950 81 1 1 174307 82 1 1 350330 83 1 1 193417 84 1 0 201675 85 1 1 56704 86 1 1 531542 87 1 1 52440 88 1 1 155620 89 1 0 197283 90 1 1 194866 91 1 0 89567 92 1 1 105822 93 1 1 112102 94 1 1 42600 95 1 0 77926 96 1 0 29536 97 1 0 44532 98 1 0 31599 99 1 1 29415 100 1 0 10054 101 1 0 14895 102 1 0 13097 103 1 0 14342 104 1 0 11941 105 1 0 19365 106 1 0 9588 107 1 0 27717 108 1 2 40424 109 1 0 3549 110 1 0 21535 111 1 0 28038 112 1 0 10592 113 1 1 10729 # -------------- End of all files ----------------------