# Symmetrized M-estimators of scatter # (based on multivariate t-distributions) # # This file contains an implementation of an algorithm # described by # # L. Duembgen, K. Nordhausen and H. Schuhmacher: # New Algorithms for M-Estimation of Multivariate # Scatter and Location. # Preprint 2013/2014, http://arxiv.org/abs/1312.6489. # # It requires also MVTMLE.R. # # Lutz Duembgen, Klaus Nordhausen # May 19, 2015 source("MVTMLE.R") MVTMLE_symm <- function(X,nu=0,delta=10^(-7), prewhitened=FALSE, steps=TRUE,nmax=500) # Computes symmetrized M-estimator of scatter. { Xnames <- colnames(X) Xs <- as.matrix(X) n <- dim(Xs)[1] p <- dim(Xs)[2] if (n <= p) { return("Too few observations!") } if (!prewhitened) { # Prewhiten the data: S0 <- 2*cov(Xs) tmp <- eigen(S0) tmpv <- sqrt(tmp$values) B <- t(t(tmp$vectors)*tmpv) Xs <- Xs %*% tmp$vectors Xs <- t(t(Xs)/tmpv) } else { B <- diag(1,p) } # Phase 1: Use a random subset of n # observation pairs to get a good # starting value: if (steps) { print("Phase 1 (using a random subset of n pairs):", quote=FALSE) } Pi <- sample(n) X0 <- Xs[Pi,] - Xs[Pi[c(2:n,1)],] C <- MVTMLE0(X0,nu,delta=sqrt(delta), steps=steps,prewhitened=TRUE)$B B <- B %*% C Xs <- t(qr.solve(C,t(Xs))) # Phase 2: if (steps) { print("Phase 2 (using all N pairs):", quote=FALSE) } if (n <= nmax) # Use a big data matrix with # N observation pairs: { IJ <- combn(n,2) XXs <- Xs[IJ[1,],] - Xs[IJ[2,],] tmp <- MVTMLE0(XXs,nu,delta=delta, prewhitened=TRUE,steps=steps) B <- B %*% tmp$B iter <- tmp$iter } else # Avoid the storage of N observation # pairs and perform MVTMLE0 "in loops": { N <- n*(n-1)/2 Psi <- LocalPsi(Xs,nu,n,p,N) Tmp <- eigen(Psi,symmetric=TRUE) nG <- sqrt(sum((1 - Tmp$values)^2)) iter <- 0 while (nG > delta && iter < 1000) { iter <- iter + 1 B <- B %*% Tmp$vectors Xs <- Xs %*% Tmp$vectors a <- LocalA(Xs,nu,n,p,N,Tmp$values) # Check whether the new matrix # parameter is really better: a.half <- a/2 Xs.new <- t(t(Xs)*exp(-a.half)) DL <- LocalDL(Xs,nu,n,p,N,Xs.new,a) DL0 <- sum(a*(1 - Tmp$values))/4 if (steps) { print(cbind(Before.iteration=iter, Norm.of.gradient=nG, Diff.M2LL.with.PN=DL)) } if (DL <= DL0) { B <- t(t(B) * exp(a.half)) Xs <- Xs.new } else # perform a step of the classical # fixed-point algorithm: { sqrtTmpValues <- sqrt(Tmp$values) B <- t(t(B) * sqrtTmpValues) Xs <- t(t(Xs) / sqrtTmpValues) } Psi <- LocalPsi(Xs,nu,n,p,N) Tmp <- eigen(Psi,symmetric=TRUE) nG <- sqrt(sum((1 - Tmp$values)^2)) } if (steps) { print(cbind(After.iteration=iter, Norm.of.gradient=nG)) } } S <- tcrossprod(B) if (nu == 0) { S <- S / det(S)^(1/p) } rownames(S) <- colnames(S) <- Xnames return(list(B=B,S=S,iter=iter)) } LocalPsi <- function(X,nu,n,p,N) { Psi <- matrix(0,p,p) dmax <- floor((n-1)/2) for (d in 1:dmax) { jj <- c((d+1):n,1:d) Y <- X - X[jj,] denom <- nu + rowSums(Y^2) Z <- Y/sqrt(denom) Psi <- Psi + crossprod(Z) } if (n%%2 == 0) { Y <- X[1:(n%/%2),] - X[(n%/%2+1):n,] denom <- nu + rowSums(Y^2) Z <- Y/sqrt(denom) Psi <- Psi + crossprod(Z) } Psi <- Psi * ((nu + p)/N) return(Psi) } LocalA <- function(X,nu,n,p,N,evs) { Ht <- matrix(0,p,p) dmax <- floor((n-1)/2) for (d in 1:dmax) { jj <- c((d+1):n,1:d) Y <- X - X[jj,] denom <- nu + rowSums(Y^2) Z <- Y^2/denom Ht <- Ht + crossprod(Z) } if (n%%2 == 0) { Y <- X[1:(n%/%2),] - X[(n%/%2+1):n,] denom <- nu + rowSums(Y^2) Z <- Y^2/denom Ht <- Ht + crossprod(Z) } Ht <- diag(evs) - Ht * ((nu + p)/N) + (nu == 0)/p a <- qr.solve(Ht,evs - 1) return(a) } LocalDL <- function(X,nu,n,p,N,Xnew,a) { DL <- 0 dmax <- floor((n-1)/2) for (d in 1:dmax) { jj <- c((d+1):n,1:d) Y <- X - X[jj,] z <- nu + rowSums(Y^2) Ynew <- Xnew - Xnew[jj,] znew <- nu + rowSums(Ynew^2) DL <- DL + sum(log(znew/z)) } if (n%%2 == 0) { Y <- X[1:(n%/%2),] - X[(n%/%2+1):n,] z <- nu + rowSums(Y^2) Ynew <- Xnew[1:(n%/%2),] - Xnew[(n%/%2+1):n,] znew <- nu + rowSums(Ynew^2) DL <- DL + sum(log(znew/z)) } DL <- DL * ((nu + p)/N) + sum(a) return(DL) }