RunsTest <- function(Y,show.cdf=FALSE,p0=0.005) # For a binary data vector Y # (logical or numeric, two different values of the components), # this program computes the left-, right- and two-sided exact # permutation P-value for the null hypothesis that Y is (in # distribution) exchangeable, based on the test statistic # T(Y) := sum(Y[1:(n-1)] != Y[2:n]) # with n = length(Y). # Note that this is NOT the runs test based on the binomial # distribution Bin(n-1, 1/2). Instead we are working with # the conditional distribution of T(Y), given the two # frequencies # #{i : Y[i] == max(Y)}, #{i : Y[i] == min(Y)} . # # Lutz Duembgen, May 30, 2011 { n <- length(Y) S <- sum(Y == max(Y)) W <- sum(Y == min(Y)) if (S + W != n) { print("Error: data vector is not a truely binary vector!") return() } T <- sum(Y[1:(n-1)] != Y[2:n]) F <- LocalRunsDistr(S,W); pwl <- F[T] if (T == 1) { pwr <- 1 } else { pwr <- 1 - F[T-1] } pwz <- min(2*min(pwl,pwr),1) if (show.cdf) { x1 <- 1 while(F[x1] < p0) { x1 <- x1+1 } x2 <- x1 while(F[x2] < 1-p0) { x2 <- x2+1 } plot(stepfun(x=(1:length(F)), y=c(0,F)), verticals=T,pch="", xlim=c(x1,x2),ylim=c(0,1), xlab="t",ylab="F(t)", main="CDF of T(Pi(Y)), given Y") } return(list(runs.statistic=T, frequencies=c(S,W), leftsided.p.value=pwl, rightsided.p.value=pwr, twosided.p.value=pwz)) } LocalRunsDistr <- function(S,W) # computes the vector F with entries # F[t] = Prob(T(Pi(Y)) <= t | Y), # where Pi(Y) is a random permutation of Y. { if (S == 1) { HS <- 0 } else { HS <- c(0, cumsum(log((S-1):1) - log(1:(S-1)))) # HS[k] = log {S-1 choose k-1} } if (W == 1) { HW <- 0 } else { HW <- c(0, cumsum(log((W-1):1) - log(1:(W-1)))) # HW[k] = log {W-1 choose k-1} } MSW <- min(S,W); # Determine support of the distribution: if (S == W) { t <- (1:(2*MSW-1)) } else { t <- (1:(2*MSW)) } F <- rep(0,times=length(t)) # At first, F contains the log-weights of the distribution # up to an additive constant: F[seq(1,(2*MSW-1),by=2)] <- log(2) + HS[1:MSW] + HW[1:MSW] tmp1 <- HS[1:(MSW-1)] + HW[2:MSW] tmp2 <- HS[2:MSW] + HW[1:(MSW-1)] F[seq(2,(2*MSW-2),by=2)] <- pmax(tmp1,tmp2) + log(1+exp(-abs(tmp1-tmp2))) if (S > W) { F[2*MSW] <- HS[MSW+1] + HW[MSW] } if (S < W) { F[2*MSW] <- HS[MSW] + HW[MSW+1] } # Transformation of F to a distribution function: F <- exp(F - max(F)) F <- cumsum(F) F = F/F[length(F)] return(F) }