KalmanEM.R

Revision 2 - 1/27/09 at 1:59 pm by e2holmes

Previous revision | Next rev
Back to revision history for KalmanEM.R
This file is part of the project Kalman-EM
KalmanEM = function(y, whichPop=NA, Uinit=NA, Qinit=NA, Ainit=NA, Rinit=NA, x00=NA, V00init=NA, max.iter=5000, varcov.Q="unconstrained", varcov.R="diagonal", U.groups=NA, Q.groups=NA, R.groups=NA, U.bounds=c(-1,1), logQ.bounds = c(log(1.0e-05),log(1.0)), logR.bounds = c(log(1.0e-05),log(1.0)), estInteractions=FALSE, MonteCarloInit = FALSE, numInits = 500, numInitSteps = 10, tol=0.01, silent=FALSE) 
{                                                                                                                                                                                                                                                                                       

# Code written by Eric Ward (EW) and Eli Holmes (EH) 
#         adapted from Matlab code by Eli Holmes/Rich Hinrichsen and R code from Shumway and Stoffer (2006)
# A description of the algorithm (for the univariate case) is in "An EM algorithm.pdf"
# July 4, 08, EH added estimation A bias term
# July 7, 08  EW added arguments whichPop (nx1), R.groups (nx1), U.groups (mx1) and Q.groups (mx1)
# July 12, 08 EH cleaned up error checking
# July 19, 08 EW added a few catches to trap cases where A is never estimated
# Aug 06, 08 EW added names, so that strings may be passed in for groups / sites / species (names also display on output)
# Nov 08 EW added bootstrapping functions
############## Things that the function needs passed in
###########################################################
# y is the LOG TRANSFORMED data with -99 for MISSING VALS; each column is a separate observation time series; time is down the rox
y <- t(y)        #for consistency, all data files are assumed to have time going down rows; this algorithm wants time across cols
# Initial conditions for the algorithm are passed in as vectors Uinit,Qinit,Rinit,x00
# max.iter is the number of iterations to do
# tol is the tolerence for checking if convergence has happened; lower if convergence happens in just a few iterations
# whichPop, a 1xn vector assigning observation time series to state processes
# varcov.Q and varcov.R specify the structures allowed for Q & R: "unconstrained", "diagonal" or "equalvarcov"
# R.groups is a nx1 vector specifying which observation variances are shared across different state processes; default is all unique
# U.groups is a mx1 vector specifying which u's are shared across different state processes; default is all unique
# Q.groups is a mx1 vector specifying which process variances are shared across different state processes; default is all unique

############# Check that package MASS is installed and load it if it isn't already loaded
###########################################################
if(require(MASS)==FALSE) {
  str1 = "The KalmanEM function requires the package MASS and you haven't installed it yet.\n"
	str2 = "Install the package before continuing by\n"
	str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
      cat(paste(str1,str2,str3,sep=""))	
	stop("missing MASS package")
}

############# Set the values for whichPop, Q.groups, U.groups, R.groups and inits if they were not passed in
###########################################################
  U.names=U.groups
  if(is.numeric(U.groups)==FALSE) U.groups = as.numeric(as.factor(U.groups))
  R.names=R.groups
  if(is.numeric(R.groups)==FALSE) R.groups = as.numeric(as.factor(R.groups))
  Q.names=Q.groups
  if(is.numeric(Q.groups)==FALSE) Q.groups = as.numeric(as.factor(Q.groups))
  n = dim(y)[1]  
  default = length(whichPop)==1 & is.na(whichPop[1]); if(default) whichPop = seq(1,n)
  m = max(whichPop) 
  default = length(Q.groups) == 1 & is.na(Q.groups[1]); if(default) Q.groups = seq(1,m)
  default = length(U.groups) == 1 & is.na(U.groups[1]); if(default) U.groups = seq(1,m) 
  default = length(R.groups) == 1 & is.na(R.groups[1]); if(default) R.groups = seq(1,n) 
  default = length(Uinit)==1 & is.na(Uinit[1]); if(default) Uinit = rep(0,m)
  default = length(Qinit)==1 & is.na(Qinit[1]); if(default) Qinit = rep(0.05,m)
  default = length(Ainit)==1 & is.na(Ainit[1]); if(default) Ainit  = rep(0,n)
  default = length(Rinit)==1 & is.na(Rinit[1]); if(default) Rinit = rep(0.05,n)
  default = length(x00)==1 & is.na(x00[1]); 
  tmp = y; tmp[tmp==-99] = NA;
  if(default) x00 = rep(mean(tmp,na.rm=T), m)  #this use the mean over all the observations for all observation ts
  default = length(V00init)==1 & is.na(V00init[1]); if(default) V00init = rep(1,m);
  if(length(which(is.na(U.names)==TRUE))) U.names=U.groups
  if(length(which(is.na(Q.names)==TRUE))) Q.names=Q.groups
  if(length(which(is.na(R.names)==TRUE))) R.names=R.groups

############ Error check to make sure user did not specify an illegal or inconsistent model
###########################################################
if(FailedErrorCheck(y, whichPop, U.groups, R.groups, Q.groups, varcov.Q, varcov.R,Uinit,Qinit,Ainit,Rinit,x00)) 
	stop("Model is illegal or not interally consistent")

############# Set up the model structure: how many state processes and which observation time series go to which state process
###########################################################
n <- dim(y)[1] #the user passes in y with time down the rows, but at top of function this is transposed because this func wants time in the cols
m = max(whichPop)  # this is the number of state processes
numYrs <- dim(y)[2]

# Construct the Z matrix based on n, m, and whichPop; this relates observation time series to state processes
Z = matrix(0,n,m) # this will be filled 0s and 1s...if each site is unique, Z = diag(n)
for(i in 1:m) Z[which(whichPop==i),i] <- 1

# Set up the grouping structure among different state processes and the different observation time se
U.numGroups <- max(U.groups)
Q.numGroups <- max(Q.groups)
R.numGroups <- max(R.groups)

# Create matrices of 0s and 1s for each, to later be used for multiplication   
ZU = matrix(0,m,U.numGroups)   # matrix to allow shared growth rates
for(i in 1:U.numGroups) ZU[which(U.groups == i),i] <- 1
ZQ <- matrix(0,m,Q.numGroups)  # matrix to allow shared process variances
for(i in 1:Q.numGroups) ZQ[which(Q.groups==i),i] <- 1 
ZR <- matrix(0,n,R.numGroups)  # matrix to allow shared measurement errs
for(i in 1:R.numGroups) ZR[which(R.groups==i),i] <- 1 

# Print out the model structure as a check for the user
if(!silent) {
  modelStruc = paste("Model Structure is\n","m: ",m," state process(es)\n","n: ",n," observation time series\n",sep="")
  modelStruc = paste(modelStruc,"whichPop: Observation time series assigned to state processes as ",paste(whichPop,collapse=" "),"\n",sep="")
  if(m > 1) modelStruc = paste(modelStruc,"U.groups: State process growth rates assigned to groups as ",paste(U.groups,collapse=" "),"\n",sep="")
  if(m > 1 & varcov.Q == "unconstrained")
    modelStruc = paste(modelStruc,"Q: Process errors have an unconstrained m x m variance-covariance matrix\n",sep="")
  if(m > 1 & varcov.Q == "equalvarcov")
    modelStruc = paste(modelStruc,"Q: Process errors have an m x m variance-covariance matrix with equal variances and equal covariances\n",sep="")
  if(m > 1 & varcov.Q == "diagonal"){
    modelStruc = paste(modelStruc,"Q: Process errors are uncorrelated and have a diagonal var-cov matrix.\n",sep="")
    modelStruc = paste(modelStruc,"Q.groups: State process variances assigned to groups as ",paste(Q.groups,collapse=" "),"\n",sep="")
    }
  if(varcov.R == "unconstrained")
    modelStruc = paste(modelStruc,"R: Observation errors have an unconstrained n x n variance-covariance matrix\n",sep="")
  if(varcov.R == "equalvarcov")
    modelStruc = paste(modelStruc,"R: Observation errors have an n x n variance-covariance matrix with equal variances and equal covariances\n",sep="")
  if(varcov.R == "diagonal"){
    modelStruc = paste(modelStruc,"R: Observation errors are uncorrelated and have a diagonal var-cov matrix.\n",sep="")
    modelStruc = paste(modelStruc,"R.groups: Observation variances assigned to groups as ",paste(R.groups,collapse=" "),"\n",sep="")
    }
  cat(modelStruc)
}

#############Set up some matrices used for handling missing values
# include is a n x m matrix to handle missing values; 1 matrix for each year;
# if a data point in time series j in yr i is missing, element j in the diagonal of the include matrix for year i is set to 0
###########################################################
include <- array(0, dim=c(n,m,numYrs))
for(i in 1:numYrs) 
   include[,,i] <- makediag(ifelse(y[,i]!=-99,1,0),nrow=n)%*%Z  #include = 1 when data exists, 0 otherwise
miss <- ifelse(y!=-99,0,1)     # 0=observed, 1=missing
notmiss <- ifelse(y!=-99,1,0)     # 1=observed, 0=missing
y[which(y == -99)] <- 0   # Clear out -99s in data; include has been set up so that these values will be ignored

#############Create record of each variable over all iterations
########################################################### 
iter.record <- list(U = array(0,dim=c(m,max.iter)), Q = array(0,dim=c(m,m,max.iter)), 
  A = array(0,dim=c(n,max.iter)), R = array(0,dim=c(n,n,max.iter)), loglike = array(0, dim=c(1, max.iter)) )

#############Start the EM algorithm which is going to keep updating until
# it converges
###########################################################
#Set the first (or initial) values for the parameters
U= array(Uinit, dim=c(m,1)) 
Q=makediag(Qinit,nrow=m) 
A= array(Ainit,dim=c(n,1))
R=makediag(Rinit,nrow=n) #again we should be estimating this
x00=array(x00,dim=c(m,1))
init.x00 = x00 
V00 <- makediag(V00init,nrow=m); #initial variance of initx;
B = makediag(1,m)   # Interaction matrix: If estimated it will change from identity, otherwise remain constant
initY1 = 0
for(i in 1:n) {
   # do a simple linear regression, estimating the predicted value
   y.obs = y[i,]
   y.obs[which(y.obs==0)] = NA
   initY1[i] = lm(y.obs~seq(1:numYrs))$fitted.values[1]
}
bestLL = -1.0e10 #this is only used if MonteCarloInit == TRUE

if(MonteCarloInit == FALSE) {
   # use values passed in by user
   numInits = 0
   numInitSteps = 0
}
# EW: the first numInits iterations of this loop represent the MCinitialization phase (if MCInit=T),
# the last iteration is the final estimation (if MCInit=F, then goes straight to the last)
for(loop in 1:(numInits + 1)) {
   #reset for each loop
   loglike.old <- -10000
   cvg <- 1+tol

   # this determines # of iterations, if in MCInit, then iter = numInitSteps otherwise max.iter
   numIter = ifelse(loop <= numInits,numInitSteps,max.iter)   
   if(MonteCarloInit == TRUE) { # draw random initial conditions / parameter values
       U = array(runif(m,U.bounds[1],U.bounds[2]),dim=c(m,1))
       Q = makediag(exp(runif(m,logQ.bounds[1],logQ.bounds[2])),nrow=m)
       R = makediag(exp(runif(n,logR.bounds[1],logR.bounds[2])),nrow=n)
       init.x00 = array(runif(m,0.75*initY1,1.25*initY1),dim=c(m,1))
       x00 = init.x00
   }
   # Some initial start draws of the params are so awful that loglike will go down.  We want to discard these cases.
   validDraw = TRUE

   # If we've exceeded the initialization phase, use the best params for starting final estimation
   if(loop > numInits & MonteCarloInit == TRUE) {
      Q = best.Q
      R = best.R
      U = best.U
      x00 = best.x00
   }

   for(iter in 1:numIter) { 
      kf <- Kfilter(m,n,numYrs,U,Q,A,R,B,x00,V00,include,y) #call kalman filter to get x(t)|y(1:T) etc estimates
      loglike.new = kf$loglik
     
      if(iter>1 && is.finite(loglike.old) == T && is.finite(loglike.new) == T) cvg = loglike.new - loglike.old  ## cvg is improvement in likelihood
   	# If loglike goes down, set validDraw to FALSE
      if(cvg<0) validDraw = FALSE
      else validDraw = TRUE

    	# if converged, break out
	if(cvg >= 0 & cvg < tol) break
      
	# Store loglike for comparison to new one after parameters are updated
	loglike.old = loglike.new
    
    	################# M STEP Re-estimate U,Q,A,R,B,X00,V00 via ML given x(t) estimate
    	################
    	# Get new A subject to its constraints (update of R will use this)
    	# See notes by EH for derivation
    	##############################################################
    	A.last.iter <- A     
      A=array(0,dim=c(n,1));  #A will be zero except when there are multiple observation time series for a single state process
      # any state process with multiple observation time series will have 1st A set to 0 and rest estimated
      tableA <- as.numeric(table(whichPop))  #figure out which state processes have more than 1 observation time series
      numSubsWith2Sites = which(tableA > 1)
      if(length(numSubsWith2Sites) > 0) {  # added by EW 07/19 to catch the case where A not estimated
          for(subpops in 1:length(numSubsWith2Sites)) {  # loop over all state processes with more than 1 observation time series
           	this.sub = numSubsWith2Sites[subpops]
           	this.n = tableA[this.sub]
           	this.index = which(whichPop==this.sub)
            sum1 <- 0
           	for (i in 1:numYrs) {
            	include.Y <- include[this.index,this.sub,i]   # include is the matrix of 0s/1s for all years, include.Y is the matrix for a single year
            	dim(include.Y) <- c(this.n,1)             #this is just for one state process so nx1 
            	A.if.y.missing = makediag(miss[this.index,i],nrow=this.n)%*%A.last.iter[this.index]   #this is going to give me 0 if have val and old A if not
            	A.if.y.present = array(y[this.index,i],dim=c(this.n,1))-include.Y %*% kf$xtT[this.sub,i]
            	sum1 <- sum1 + A.if.y.present + A.if.y.missing
            	}	 #end for numYrs loop
        		As = sum1/numYrs
            As = As - As[1]
            A[this.index] = As
        	} # end for subpop loop             
      } # end if
     	################
    	# Get new U subject to its constraints (update of Q will use this)
    	# See notes by EH and Rich Hinrichsen for derivation
    	################################################################
      # if m=1 or m=n this is going to reduce to U <- (kf$xtT[,numYrs]-kf$xtT[,1])/(numYrs-1)
    	# if some state processes share a u, then we need to take the average across processes sharing a u, taking into account the variance of each process
      Qinv = chol2inv(chol(Q))    # this is calculated here because used twice below      
      Qinv = (Qinv+t(Qinv))/2     #enforce symmetry
      numer = t(ZU)%*%Qinv%*%(kf$xtT[,numYrs]-kf$xtT[,1])
      denom = 1/takediag((t(ZU)%*%Qinv%*%ZU)*(numYrs-1))
      U = c(ZU%*%(numer*denom))
    
    	################
    	# Get new R subject to its constraints (updated  07.09.08 by EH to fix R est bug and by EW to add grouping)
    	# S&S 4.77 with addition of grouping and diagonal constraint variants
    	################################################################
    	sum1 <- 0
    	for (i in 1:numYrs) {
        include.Y <- include[,,i]   # include is the matrix of 0s/1s for all years, include.Y is the matrix for a single year
        dim(include.Y) <- c(n,m)             #stops R from changes the dimensions when m=1 
        include.Y2 <- array(notmiss[,i],dim=c(n,1))
        # this is the updating equation for R
        # EH 7.10.08 THIS IS THE PART THAT IS PREVENTING ESTIMATION OF R COVARIANCES WHEN THERE ARE MISSING VALUES
        R.if.y.missing <- makediag(miss[,i],nrow=n)%*%R   #this is going to give me 0 if have val and R from last iteration if not
        err <- y[,i]-(include.Y %*%kf$xtT[,i]+include.Y2*A)  #residuals: this will be zero when y[j] is missing]
        R.if.y.present <- err%*%t(err) + include.Y %*% kf$VtT[,,i] %*% t(include.Y) #updated R estimate if have y data
        sum1 <- sum1 + R.if.y.present + R.if.y.missing 
    	} #end for loop
    	sum1=(sum1+t(sum1))/2 #enforce symmetry;
    	R_unconstrained = sum1/numYrs #this provides the estimate of the R matrix with diagonal and non-diagonal elements
      #The inv functions don't work well when the diagonal elements get lower than the machine tol, so put a min on those
    	#this could break if R_unconstrained happens to equal an integer
	    if(length(R_unconstrained)==1)
		     R_unconstrained <- max(sqrt(.Machine$double.eps),R_unconstrained)
	    else
		     diag(R_unconstrained)[which(takediag(R_unconstrained) 1) #this will get the average to use for the shared covariance value
            Roffdiag <- (1/(n*(n-1)))*(sum(R_unconstrained)-sum(takediag(R_unconstrained)))
         Roffdiag <- Roffdiag*(matrix(1,n,n)-makediag(1,n))
         R <- Rdiag + Roffdiag
    	} #if R is equal var and equal cov
    
    	################
    	# Get new Q subject to its constraints (updated  07.09.08 by by EW to add grouping)
    	# This is a little different than S&S Eqn 4.76; theirs is based on the likelihood
    	# as written in Eqn 4.69 & and Harvey 4.2.19
    	# We don't have a prior on V00 and what you set that at affects the Q M-step calculation
    	# to its detriment.  
    	# Instead I'm using the likelihood calculation from Ghahramani and Hinton which is does 
    	# the sum for the Q bit from 2 to T not 1 to T; see notes by EH
    	# S&S and Harvey would start here
    	# S00 <- kf$V0T + kf$x0T%*%t(kf$x0T)
      # S11 <- kf$VtT[,,1] + (kf$xtT[,1]-U)%*%t(kf$xtT[,1]-U)
      # S10 <- kf$Vtt1T[,,1] + (kf$xtT[,1]-U)%*%t(kf$x0T);
      # I switch on 7/22/08 to this since it seems less sensitive to V00 and finds Q with max L with lower tol setting
      # with S&S it climbs Q at more slowly and cvg hits tol before max is reached
    	################################################################
      S00 = 0
      S11 = 0
      S10 = 0
      for (i in 2:numYrs) {
        S00 <- S00 + (kf$VtT[,,i-1] + kf$xtT[,i-1]%*%t(kf$xtT[,i-1]));
        S10 <- S10 + (kf$Vtt1T[,,i]+(kf$xtT[,i]-U)%*%t(kf$xtT[,i-1]));
        S11 <- S11 + (kf$VtT[,,i] + (kf$xtT[,i]-U)%*%t(kf$xtT[,i]-U));
      }
      Q_unconstrained = (S11 - (S10 + t(S10)) + S00)/(numYrs-1);    #numYrs - 1 since summing 2 to T
      # Calculate the interaction matrix, if requested
      if(estInteractions==TRUE) B = S10%*%chol2inv(chol(S00))
      #The inv functions don't work well when the diagonal elements get lower than the machine tol, so put a min on those
	    if(length(Q_unconstrained)==1)
	       Q_unconstrained <- max(sqrt(.Machine$double.eps),Q_unconstrained)
	    else
	       diag(Q_unconstrained)[which(diag(Q_unconstrained) 1) 
            Qoffdiag <- (1/(m*(m-1)))*(sum(Q_unconstrained)-sum(takediag(Q_unconstrained)))
         Qoffdiag <- Qoffdiag*(matrix(1,m,m)-makediag(1,m))
         Q <- Qdiag + Qoffdiag;
      }   #if equal variances and covariances
    
    	################
    	# EH: Get new x00, V00 cannot be estimated so it is fixed
    	# S&S Eqn 4.78
    	################################################################
      x00 <- c(kf$x0T)
    
    	################
    	# Keep a record of the iterations; this is to allow user to debug if they are having problems with convergence
    	################################################################
      iter.record$R[,,iter] <- R   
      iter.record$U[,iter]  <- U
      iter.record$Q[,,iter] <- Q
      iter.record$A[,iter] <- A
      iter.record$loglike[iter] <- loglike.new
   }  # end inner iter loop

   # If doing a MCInit, check whether the likelihood is the best observed
   # Only use bootstrap param draws where loglike did not go down during numInitSteps
   if(validDraw == TRUE & MonteCarloInit == TRUE & loglike.new > bestLL) {
      # update the best initial parameter estimates
      best.Q = iter.record$Q[,,1]
      best.R = iter.record$R[,,1]
      best.U = iter.record$U[,1]
      best.x00 = init.x00
      bestLL = loglike.new
   }
   
}   # end iter loop that is running until the EM algorithm converges

# Consolidate arrays for output
iter.record$R <- iter.record$R[,,1:(iter-1)]   
iter.record$U <- iter.record$U[,1:(iter-1)]
iter.record$Q <- iter.record$Q[,,1:(iter-1)]
iter.record$A <- iter.record$A[,1:(iter-1)]
iter.record$loglike <- iter.record$loglike[1:(iter-1)]
if(!silent){
	cat(paste("Finished in ",iter," interations. Max.iter was ",max.iter,".\n",sep=""))
}
# confidence intervals based on state std errors, see caption of Fig 6.3 (p337) Shumway & Stoffer
if(m==1) se = kf$VtT[,,1:numYrs]
if(m > 1) {
   se = matrix(0,nrow=m,ncol=numYrs)
   for(i in 1:numYrs) se[,i] = t(sqrt(takediag(kf$VtT[,,i])))
}

names(U) = U.names

if(is.matrix(Q)==F) Q = as.matrix(Q) # this line added to make sure that a scalar isn't passed to rmvnorm - 01/20/09

rownames(Q) = Q.names
colnames(Q) = Q.names
rownames(R) = R.names
colnames(R) = R.names
states = kf$xtT
return(list(states=states,CI_states.lower = (kf$xtT - 1.96*se), CI_states.upper = (kf$xtT + 1.96*se), A=A,B=B,Q=Q,R=R,U=U,Z=Z,x00=x00,V00=V00,V0T=kf$V0T,Kt=kf$Kt,Innov=kf$Innov,Sigma=kf$Sigma,m=m, n=n, numYrs=numYrs,iter=iter,loglike=loglike.new,iter.record=iter.record, whichPop=whichPop, R.groups=R.groups, Q.groups=Q.groups, U.groups=U.groups, varcov.R=varcov.R, varcov.Q=varcov.Q, data=t(y), include=include))

}  #End of the KalmanEM function


Kfilter = function(m,n,numYrs,U,Q,A,R,Phi,x00,V00,include,y) {
     # EH: This is running a Kalman-Raush filter (forward + backward) to get (written univariate to make it look cleaner):
     # xtT: E(x(t) | y(1:T)) , VtT: Var(x(t)*x(t) | y(1:T)), Vtt1T: Var(x(t)*x(t-1) | y(1:T))
     # EJW: This code is a hybrid from Shumway and Stoffer (2006) and Matlab
     # code originally written by Eli Holmes for multi-state state-space
     # estimation.  The code originally used solve() for matrix inverses - 
     # EW changed this to cholesky decomp though for speed.
     # ** All eqn refs are to 2nd ed of Shumway & Stoffer (2006): Time Series Analysis and Its Applications
     
    #initialize - these are for the forward, Kalman, filter
    # for notation purposes, 't' represents current point in time, 'T' represents the length of the series
    Vtt <- array(0,dim=c(m,m,numYrs))     # Analagous to S&S Ptt, E[Vtt|y(1:t)]
    Vtt1 <- array(0,dim=c(m,m,numYrs))    # Analagous to S&S Ptt1, E[Vtt1|y(1:t)]
    xtt <- array(0,dim=c(m,numYrs))       # E[x(t) | Y(t)]
    xtt1 <- array(0,dim=c(m,numYrs))      # E[x(t) | Y(t-1)]
    innov <- array(0,dim=c(n,numYrs))     # these are innovations, parentheses in 6.21 
    vt <- array(0,dim=c(n,numYrs))        # used for likelihood, vt equivalent to epsilon, eqn 6.62
    Ft <- array(0,dim=c(n,n,numYrs))      # used for likelihood, Ft equivalent to sigma matrix eqn 6.62
    # these are for the backwards, Rausch, filter
    VtT <- array(0,dim=c(m,m,numYrs))     # E[Vtt|y(1:T)]
    J <- array(0,dim=c(m,m,numYrs))       # see eqn 6.49
    Vtt1T <- array(0,dim=c(m,m,numYrs))   # E[Vtt1|y(1:T)]
    xtT <- array(0,dim=c(m,numYrs))       # E[x | y(1:T)]
    Kt <- array(0, dim=c(m,n,numYrs))     # 3D matrix of Kalman gain, EW added 11/14/08
    
    #forward pass gets you E[x(t) given y(1:t)]
    xtt1[,1] <- Phi%*%c(x00) + U        # eqn 6.19
    Vtt1[,,1] <- Phi%*%V00%*%t(Phi) + Q          # eqn 6.20
    include.Y <- include[,,1]     # used for missing data
    dim(include.Y) <- c(n,m)  #stops R from changes the dimensions when m=1 
    include.Y2 <- (include.Y%*%array(1,dim=c(m,1))) # EH: create include.Y for A that is nx1
    siginv = include.Y%*%Vtt1[,,1]%*%t(include.Y)+R    # bracketed piece of eqn 6.23
    siginv=chol2inv(chol(siginv))         # now siginv is sig[[i]]^{-1}       
    siginv = (t(siginv)+siginv)/2 #Vtt1 happens to be symmetric since it is V00+Q; although in general E(xt t(xt1)) is not symmetric
    Kt[,,1] <- Vtt1[,,1]%*%t(include.Y) %*% siginv;    #broke siginv to impose symmetry, eqn 6.23

    innov[,1] <- y[,1] - (include.Y%*%xtt1[,1] + include.Y2*A)
    xtt[,1] <- xtt1[,1] + Kt[,,1]%*%innov[,1];   # eqn 6.21
    Vtt[,,1] <- Vtt1[,,1]-Kt[,,1]%*%include.Y%*%Vtt1[,,1];      # eqn 6.22, detail after 6.28
    Vtt[,,1] <- (Vtt[,,1]+t(Vtt[,,1]))/2;       #Vtt must be symmetric
    # the missing values will contribute 0.0 for this calc
    vt[,1] <- y[,1]-(include.Y%*%xtt1[,1]+include.Y2*A) #3.3.4; need to hold on to this for loglik calc
    Ft[,,1] <- include.Y%*%Vtt1[,,1]%*%t(include.Y)+R #need to hold on to this for loglik calc
    Ft[,,1] <- (Ft[,,1]+t(Ft[,,1]))/2; #to ensure its symetric

    for (t in 2:numYrs) {
       xtt1[,t] <- Phi%*%xtt[,t-1] + U  #xtt1 denotes x_t^(t-1), eqn 6.19
       Vtt1[,,t] <- Phi%*%Vtt[,,t-1]%*%t(Phi) + Q                  # eqn 6.20
       Vtt1[,,t] <- (Vtt1[,,t]+t(Vtt1[,,t]))/2   #in general Vtt1 is not symmetric but here it is since Vtt and Q are
       include.Y <- include[,,t]  
       dim(include.Y) <- c(n,m)  #stops R from changes the dimensions when m=1  
	     include.Y2 <- (include.Y%*%array(1,dim=c(m,1))) # I'm futzing with the include.Y for A to make it nx1
       Kt[,,t] <- Vtt1[,,t]%*%t(include.Y) %*% chol2inv(chol(include.Y%*%Vtt1[,,t]%*%t(include.Y)+R))   # eqn 6.23
       innov[,t] <- y[,t] - (include.Y%*%xtt1[,t] + include.Y2*A)
       xtt[,t] <- xtt1[,t] + Kt[,,t]%*%innov[,t]   # eqn 6.21
       Vtt[,,t] <- Vtt1[,,t]-Kt[,,t]%*%include.Y%*%Vtt1[,,t]                             # eqn 6.22, detail after 6.28
       Vtt[,,t] <- (Vtt[,,t]+t(Vtt[,,t]))/2 #to ensure its symetric
       # missing values will contribute nothing
       vt[,t] <- y[,t]-(include.Y%*%xtt1[,t]+include.Y2*A) #need to hold on to this for loglik calc
       Ft[,,t] <- include.Y%*%Vtt1[,,t]%*%t(include.Y)+R #need to hold on to this for loglik calc
       Ft[,,t] <- (Ft[,,t]+t(Ft[,,t]))/2 #to ensure its symetric
    }
    KT <- Kt[,,t];

    #indexing is 0 to T for the backwards (Rausch) recursions
    #backward pass gets you E[x(t)|y(1:T)] from E[x(t)|y(1:t)]
    xtT[,numYrs] <- xtt[,numYrs]  
    VtT[,,numYrs] <- Vtt[,,numYrs]
    s <- seq(numYrs,2)
    for(i in 1:(numYrs-1)) {
        yr <- s[i]   #equivalent to T:-1:0  
        Vinv <- chol2inv(chol(Vtt1[,,yr]))
        Vinv <- (Vinv + t(Vinv))/2 #to enforce symmetry after chol2inv call
        J[,,yr-1] <- Vtt[,,yr-1]%*%t(Phi)%*%Vinv     # eqn 6.49
        xtT[,yr-1] <- xtt[,yr-1] + J[,,yr-1]%*%(xtT[,yr]-xtt1[,yr])     # eqn 6.47
        VtT[,,yr-1] <- Vtt[,,yr-1] + J[,,yr-1]%*%(VtT[,,yr]-Vtt1[,,yr])%*%t(J[,,yr-1])  # eqn 6.48
        VtT[,,yr-1] <- (VtT[,,yr-1]+t(VtT[,,yr-1]))/2     #VtT is symmetric
    }
    Vinv <- chol2inv(chol(Vtt1[,,1]))
    Vinv <- (Vinv + t(Vinv))/2 #to enforce symmetry after chol2inv call
    J0 <- V00%*%t(Phi)%*%Vinv                      # eqn 6.49
    x0T <- x00 + J0%*%(xtT[,1]-xtt1[,1]);          # eqn 6.47
    V0T <- V00 + J0%*%(VtT[,,1]-Vtt1[,,1])*t(J0)   # eqn 6.48
    V0T <- (V0T+t(V0T))/2;

    #run another backward recursion to get E[x(t)x(t-1)|y(T)]
    include.Y <- include[,,numYrs]  
    dim(include.Y) <- c(n,m) #stops R from changes the dimensions when m=1  
    Vtt1T[,,numYrs] <- (makediag(1,m) - KT%*%include.Y)%*%Vtt[,,numYrs-1] #this is Var(x(T)x(T-1)|y(T))
    s <- seq(numYrs,3)
    for (i in 1:(numYrs-2)) {
       yr <- s[i]
       Vtt1T[,,yr-1] <- Vtt[,,yr-1]%*%t(J[,,yr-2]) + J[,,yr-1]%*%(Vtt1T[,,yr]-Vtt[,,yr-1])%*%t(J[,,yr-2])
    }
    Vtt1T[,,1] <- Vtt[,,1]%*%t(J0) + J[,,1]%*%(Vtt1T[,,2]-Vtt[,,1])%*%t(J0)

    #Calculate log likelihood, see eqn 6.62
    loglik <- -n*(numYrs/2)*log(2*pi)
    for (i in 1:numYrs) {
        if(length(Ft[,,i])==1) detFt <- Ft[,,i] else detFt <- det(Ft[,,i])
        Ftinv <- chol2inv(chol(Ft[,,i]))
        Ftinv <- (Ftinv +t(Ftinv))/2 #enforce symmetry; Ft is symmetric
        loglik <- loglik - (1/2)%*%t(vt[,i]) %*% Ftinv %*% vt[,i] - (1/2)*log(detFt);
    }
    return(list(xtT = xtT, VtT = VtT, Vtt1T = Vtt1T, x0T = x0T, V0T = V0T, loglik = loglik, Vtt = Vtt, Vtt1 = Vtt1, J=J, Kt=Kt, xtt1 = xtt1, Innov=innov, Sigma=Ft))
}


FailedErrorCheck = function(y, whichPop, U.groups, R.groups, Q.groups, varcov.Q, varcov.R,Uinit,Qinit,Ainit,Rinit,x00) {
############# Error check to make sure user isn't trying to set up an illegal or inconsistent model structure

#################### Check whichPop
n = dim(y)[1] #each row is obs ts
if(length(whichPop) != n) {
	cat("Error: whichPop is not in the correct form; it should be 1xn or nx1; \n"); return(TRUE) }
m = max(whichPop)  # this is the number of state processes
if( FALSE %in% ( (1:m) %in% whichPop ) ) {
	cat("Error: Something is wrong with whichPop.  You need an observation time series for each state process. \n"); return(TRUE) } 

#################### Check varcov.Q
if( (varcov.Q %in% list("unconstrained", "diagonal", "equalvarcov"))==FALSE ) {
	cat("Error: options for varcov.Q are scalar, unconstrained, diagonal or equalvarcov (passed as text in quotes) \n" ); return(TRUE) }

#################### Check varcov.R
if( (varcov.R %in% list("unconstrained", "diagonal", "equalvarcov"))==FALSE ) {
	cat("Error: options for varcov.R are unconstrained, diagonal or equalvarcov (passed as text in quotes) \n" ); return(TRUE) }
if( (-99 %in% y) & varcov.R != "diagonal" ) {
	cat("Sorry! The current code cannot estimate observation error covariances when there are missing data; set varcov.R to diagonal \n"); return(TRUE) }

#################### Check Q.groups
if(length(Q.groups) != m) {
	cat("Error: There is a mismatch between the number of state processes and length of Q.groups \n"); return(TRUE) }
not.all.m.in.groups = FALSE %in% ( (1:m) %in% Q.groups )
if( not.all.m.in.groups & varcov.Q != "diagonal" ) {
	cat("Sorry! The code cannot apply groupings of Qs except to diagonal Q matrices \n"); return(TRUE) }
# Check that each state process is assigned to a Q group
if(FALSE %in% ((1:max(Q.groups)) %in% Q.groups) ) {
	cat(paste("Error: something is wrong with Q.groups You have ",max(Q.groups)," Q groups but not all appear in Q.groups \n",sep="")); return(TRUE) }

#################### Check U.groups
if(length(U.groups) != m) {
	cat(paste("Error: U.groups should be m x 1 or 1 x m","Model is m = ",m," as currently set up.\n",sep="")); return(TRUE) }
if(FALSE %in% ((1:max(U.groups)) %in% U.groups) ) {
	cat(paste("Error: something is wrong with U.groups. You have ",max(U.groups)," U groups but not all appear in U.groups. \n",sep="")); return(TRUE) }

#################### Check R.groups
if(length(R.groups) != n) {
	cat("Error: There is a mismatch between the number of measurement time series and length of R.groups. \n"); return(TRUE) }
not.all.n.in.groups = FALSE %in% ( (1:n) %in% R.groups )
if( not.all.n.in.groups & varcov.R != "diagonal" ) {
	cat("Sorry! The code cannot apply R.groups except to diagonal R matrices; set varcov.R to diagonal. \n"); return(TRUE) }
if(FALSE %in% ((1:max(R.groups) ) %in% R.groups) ) {
	cat(paste("Error: something is wrong with R.groups. You have ",max(R.groups)," R groups but not all appear in R.groups.\n",sep="")); return(TRUE) }

############# Check that the initial values are correctly sized
###########################################################
if(length(Uinit) != m) {
	cat("Error: Uinit should be m x 1 or 1 x m; Either your Uinit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
if(length(Qinit) != m) {
	cat("Error: Qinit should be m x 1 or 1 x m; Either your Qinit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
if(length(Ainit) != n) {
	cat("Error:L Ainit should be n x 1 or 1 x n; Your Ainit is the wrong size.\n"); return(TRUE) }
if(length(Rinit) != n) {
	cat("Error: Rinit should be n x 1 or 1 x n; Your Rinit is the wrong size.\n"); return(TRUE) }
if(length(x00) != m) {
	cat("Error: x00 should be m x 1 or 1 x m; Either your x00 is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }

return(FALSE)
}

takediag = function(x)
{
if(length(x)==1) return(x)
else return(diag(x))
}

makediag = function(x,nrow=NA)
{
if(length(x)==1) 
{
if(is.na(nrow)) nrow=1
return(diag(c(x),nrow))
}
if((is.matrix(x) | is.array(x)))
   if(!(dim(x)[1]==1 | dim(x)[2]==1))  stop("Error in call to makediag; x is not vector")
if(is.na(nrow)) nrow=length(x)
return(diag(c(x),nrow))
}


#######################################################################################################
#   These functions are all for bootstrapping
#######################################################################################################
bootstrapMSSM = function(nBoot, model, maxIter = 5000, tol=0.01, onlyAIC = TRUE, silent=FALSE) {
  # This function updated by EW and EH
  # all equation numbers refer to Chapter 6, Shumway & Stoffer
  # onlyAIC is a debug feature to allows all the boot params and boot data iterates to be returned
  # silent is a flag to indicate whether the progress bar should be printed
  
  ###### Check that package mvtnorm is installed and load it if it isn't already loaded
  if(require(mvtnorm)==FALSE) {
     str1 = "The bootstrapping functions require the package mvtnorm and you haven't installed it yet.\n"
     str2 = "Install the package before continuing by\n"
     str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
     cat(paste(str1,str2,str3,sep=""))
     stop("the mvtnorm package is not installed")
  }

  errors = t(stdInnov(model$Sigma, model$Innov))  # standardize innovations
  numStates = model$m      # number of subpopulations
  numYears = model$numYrs  # length of time series
  numSites = model$n       # number of time series

  # Calculate the sqrt of sigma matrices, so they don't have to be computed 5000+ times
  sigma.Sqrt = array(0,dim=c(numSites, numSites, numYears))
  BKS = array(0,dim=c(numStates, numSites, numYears)) 
  for(i in 1:numYears) {
     eig = eigen(model$Sigma[,,i])   # calculate inv sqrt(sigma[1])
     # EW added as.matrix 01/26/09
     sigma.Sqrt[,,i] = eig$vectors %*% makediag((sqrt(eig$values))) %*% t(eig$vectors)
     BKS[,,i] = model$B %*% model$Kt[,,i] %*% sigma.Sqrt[,,i]   # this is m x n
  }
  U=NA; Q=NA; R=NA; A=NA; B=NA; states=NA; boot.data=NA;
  # these are the arrays for output
  if(onlyAIC == FALSE) {
    boot.data = array(0,dim=c(dim(as.matrix(model$data)),nBoot))
    U = array(0,dim=c(dim(as.matrix(model$U)),nBoot))
    Q = array(0,dim=c(dim(as.matrix(model$Q)),nBoot))
    R = array(0,dim=c(dim(as.matrix(model$R)),nBoot))
    A = array(0,dim=c(dim(as.matrix(model$A)),nBoot))
    B = array(0,dim=c(dim(as.matrix(model$B)),nBoot))
    states = array(0,dim=c(dim(as.matrix(model$states)),nBoot))
  }
  
  logL.thetaStar = 0
  logL = 0;
  drawProgressBar = FALSE #If not time library, no prog bar
  if(require(time)==TRUE) { #then we can draw a progress bar
    prev = progressBar()
    drawProgressBar = TRUE
    }
  for(i in 1:nBoot) {
    newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS)  # make new data
    # Take the ML output and make sure it's in the right form to be passed back in 
    Rstart = diag(model$R)
    Qstart = diag(model$Q)
    # Step 2: estimate the MLEs, theta*
    boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=Qstart,Rinit=Rstart,tol=tol,silent=T,max.iter=maxIter)
    if(onlyAIC == FALSE) {
    # Step 3: store the parameter estimates from the bootstraped data fitting
	    boot.data[,,i] = as.matrix(newData)
      U[,,i] = as.matrix(boot.model$U)
      Q[,,i] = as.matrix(boot.model$Q)
      R[,,i] = as.matrix(boot.model$R)
      A[,,i] = as.matrix(boot.model$A)
      B[,,i] = as.matrix(boot.model$B)                  
      states[,,i] = as.matrix(boot.model$states)
    }
    logL[i] = Kfilter(numStates, numSites, numYears, boot.model$U, boot.model$Q, boot.model$A, boot.model$R, boot.model$B, boot.model$x00, boot.model$V00, boot.model$include, t(model$data))$loglik 
    if(drawProgressBar) prev <- progressBar(i/nBoot,prev)
  }
  
  AICb = -4*(sum(logL))/nBoot + 2*model$loglike
  return(list(AICb = AICb, logL.star=logL, U = U, Q = Q, R = R, A = A, B = B, states = states, boot.data=boot.data))
}

makeNewData = function(Errors, model, Sigma, sigmaSqrt, B.Kt.sqrtSigma) {
   # This function updated by EW and EH Dec 02, 2008
   # Errors = std innovations
   origData = model$data

   newData = matrix(-99, model$numYrs, model$n)
   newStates = matrix(-99, model$numYrs+1, model$m) # States = years x subpops
   numMissingVals = length(which(Errors==0))

   if(numMissingVals == 0) {
      # Then the bootstrap algorithm proceeds
      # Stoffer & Wall suggest not sampling from innovations 1-3 (not much data)
      minIndx = ifelse(model$numYrs > 5, 4, 1)
      samp = sample(seq(minIndx, model$numYrs), size = model$numYrs, replace = T)
      # EW added as.matrix() on 01/26/09
      e = as.matrix(Errors[samp,])
      # newStates is a[] in the writeup by EH
      newStates[1,] = model$x00
      #newStates[2,] = model$B %*% model$x00 + model$U + model$B %*% model$Kt[,,1] %*% sigmaSqrt[,,1] %*% e[1,]
      newStates[2,] = model$B %*% model$x00 + model$U + B.Kt.sqrtSigma[,,1] %*% e[1,]
      newData[1,] = model$x00[model$whichPop] + model$A + sigmaSqrt[,,1] %*% e[1,]  # eqn 1.7 in writeup by EH
      for(i in 2:model$numYrs) {  # Deal with years 2:T, use sigma, Kt, B, U, A
         newStates[i+1,] = model$B %*% newStates[i,] + model$U + B.Kt.sqrtSigma[,,i] %*% e[i,]
         newData[i,] = newStates[i,][model$whichPop] + model$A + sigmaSqrt[,,i] %*% e[i,]
      }
      newData[which(e == 0)]=-99  #newData is going to be used in the KalmanEM function which expects -99 for missing values
      # return the new data      
   }
   if(numMissingVals != 0) {
      newStates[1,] = model$x00 # t = 0
      # create a matrix of RVs for observation error
      obs.error = rmvnorm(model$numYrs, mean = rep(0, nrow(as.matrix(model$R))), sigma = model$R, method="chol")
      pro.error = rmvnorm(model$numYrs, mean = rep(0, nrow(model$Q)), sigma = model$Q, method="chol")
      for(i in 2:(model$numYrs+1)) {
         newStates[i,] = model$B %*% newStates[i-1,] + model$U + pro.error[i-1,]
         newData[i-1,] = model$Z %*% newStates[i,] + model$A + obs.error[i-1,]
      }
      newData[which(Errors==0)]=-99
   }

   return(newData)
}

stdInnov = function(SIGMA, INNOV) {
   # This function added by EW Nov 3, 2008
   # This function is only called once by bootstrapMSSM()
   # SIGMA is covariance matrix, E are original innovations
   numYrs = dim(INNOV)[2]
   numSites = dim(INNOV)[1]
   SI = matrix(0, numSites, numYrs)
   for(i in 1:numYrs) {
      a = SIGMA[,,i]
      a.eig = eigen(a)
      # EW added as.matrix() on 01/26/09 for single trajectory model
      a.sqrt = a.eig$vectors %*% makediag((sqrt(a.eig$values))) %*% solve(a.eig$vectors)
      SI[,i] = chol2inv(chol(a.sqrt)) %*% INNOV[,i]  # eqn 1, p359 S&S
   }
   SI[which(INNOV == 0)]=0   # make sure things that should be 0 are 0
   return(SI)
}

Sculpin 0.2 | xhtml | problems or comments? | report bugs