KalmanEM.r

Revision 9 - 10/8/09 at 1:04 pm by e2holmes

Previous revision
Back to revision history for KalmanEM.r
This file is part of the project Kalman-EM
KalmanEM = function(y, whichPop=NA, Binit=NA, Uinit=0, Qinit=0.05, Ainit=0, Rinit=0.05, x00=NA, V00init=10, Zinit=NA, max.iter=5000, varcov.Q="unconstrained", varcov.R="diagonal", B.constraint="identity", U.constraint="unconstrained", Z.constraint="missingvalues", A.groups=NA, U.groups=NA, Q.groups=NA, R.groups=NA, x00.groups=NA, B.groups=NA, U.bounds=c(-1,1), logQ.bounds = c(log(1.0e-05),log(1.0)), logR.bounds = c(log(1.0e-05),log(1.0)), x00prior=FALSE, estInteractions=FALSE, MonteCarloInit = FALSE, numInits = 500, numInitSteps = 10, tol=0.01, silent=FALSE, debugit=FALSE) 
{
#Version
vers = "2.69"  #EEH Oct 4, 2009
#KalmanEM written by Eli Holmes and Eric Ward
                                                                                                                                    #comment on x00prior=FALSE; x00prior=TRUE case is not programmed in yet.  In that case x00 fixed, not estimated.
#x00prior=FALSE means that we treat x00 as an unknown fixed parameter that is estimated.  So P_0=V00=0, but we don't want to set
#V00=0 because then the EM algorithm won't converge.  We want V00 big, and then at the end, return the likelihood with V00=0                                                                                                                                          
# Code written by Eric Ward (EW) and Eli Holmes (EH) 
# The Kfilter function was based was adapted from Matlab code written by Eli Holmes and Rich Hinrichsen
# This code estimates structural log-linear autoregressive models
# The model needs to specified in a log-linear form; for population count data, this means log-transforming the data
# Will deal with missing values (marked as -99), but this slows computation

############## Things that the function needs passed in
###########################################################
# y is the LOG TRANSFORMED data with -99 for MISSING VALS; each column is a separate observation time series; time is down the rox
orig.data = y
y <- t(y) #for consistency with how ecological data files are usually formatted, all data files are assumed to have time going down rows; this algorithm wants time across cols
# Initial conditions for the algorithm are passed in as vectors Uinit,Qinit,Rinit,x00
# max.iter is the number of iterations to do
# tol is the tolerence for checking if convergence has happened; lower if convergence happens in just a few iterations
# whichPop, a 1xn vector assigning observation time series to state processes
# varcov.Q and varcov.R specify the structures allowed for Q & R: "unconstrained", "diagonal" or "equalvarcov"
# R.groups is a nx1 vector specifying which observation variances are shared across different state processes; default is all unique
# A.groups is a nx1 vector specifying which observation biases are shared across different state processes; default is all unique
# U.groups is a mx1 vector specifying which u's are shared across different state processes; default is all unique
# Q.groups is a mx1 vector specifying which process variances are shared across different state processes; default is all unique

############# Check that package MASS is installed and load it if it isn't already loaded
###########################################################
if(require(MASS)==FALSE) {
  str1 = "The KalmanEM function requires the package MASS and you haven't installed it yet.\n"
	str2 = "Install the package before continuing by\n"
	str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
      cat(paste(str1,str2,str3,sep=""))	
	stop("missing MASS package")
}
############ Check that the user didn't pass in any illegal arguments
if(FailedArgumentCheck(y, whichPop, Uinit, Qinit, Ainit, Rinit, x00, V00init, max.iter, varcov.Q, varcov.R, U.groups, Q.groups, R.groups, U.bounds, logQ.bounds, logR.bounds, x00prior, estInteractions, MonteCarloInit, numInits, numInitSteps, tol, silent, debugit))
  stop("invalid argument values passed into KalmanEM()")
  
############# Give names for the rows and columns of y.  numYrs is actually the number of time steps
###########################################################
  n = dim(y)[1] #R does something strange with dim; floor makes sure it returns an integer
  numYrs <- dim(y)[2]
  #m = max(whichPop) and is defined later (after whichPop is defined in the case that whichPop=NA was passed in)
  
############# Let the user put in names for groups; convert group names to factors
############# If NA then just returns back to NA
###########################################################
  site.names=ifelse(is.null(names(y)), seq(1,n), names(y) )
  x00.names=paste(":", x00.groups, sep="") #this will get reset if x00.groups=NA
  if(is.numeric(x00.groups)==FALSE) x00.groups = as.numeric(as.factor(x00.groups))
  U.names=paste(":", U.groups, sep="")   #this will get reset if U.groups=NA 
  if(is.numeric(U.groups)==FALSE) U.groups = as.numeric(as.factor(U.groups))
  A.names=paste(":", A.groups, sep="")     #this will get reset if A.groups=NA
  if(is.numeric(A.groups)==FALSE) A.groups = as.numeric(as.factor(A.groups))
  R.names=paste(":", R.groups, sep="")    #this will get reset if R.groups=NA
  if(is.numeric(R.groups)==FALSE) R.groups = as.numeric(as.factor(R.groups))
  Q.names=paste(":", Q.groups, sep="")      #this will get reset if Q.groups=NA
  if(is.numeric(Q.groups)==FALSE) Q.groups = as.numeric(as.factor(Q.groups))
  B.names=paste(":", B.groups, sep="")      #this will get reset if Q.groups=NA
  if(is.numeric(B.groups)==FALSE) B.groups = as.numeric(as.factor(Q.groups))
  whichPop.names=paste(":", whichPop, sep="") #this will get reset if whichPop=NA
  if(is.numeric(whichPop)==FALSE) whichPop = as.numeric(as.factor(whichPop))
  
############# Set the values for whichPop, Q.groups, U.groups, R.groups if they were not passed in
########################################################### 
  default = length(whichPop)==1 & is.na(whichPop[1]); if(default) {whichPop = seq(1,n); whichPop.names=paste(":",whichPop,sep="") }
  m = max(whichPop)
  default = length(Q.groups) == 1 & is.na(Q.groups[1]) 
  if(default & varcov.Q != "equalvarcov") {Q.groups = seq(1,m); Q.names=paste(":",Q.groups,sep="")}
  if(default & varcov.Q == "equalvarcov") {Q.groups = rep(1,m); Q.names=paste(":",Q.groups,sep="") }
  default = length(U.groups) == 1 & is.na(U.groups[1]); if(default) {U.groups = seq(1,m); U.names=paste(":",U.groups,sep="")}
  #x00 and A groups are special because one would not normally set them, so only give them a name if user explicitly set them
  default = length(x00.groups) == 1 & is.na(x00.groups[1]); if(default) {x00.groups = seq(1,m); x00.names=""} 
  default = length(A.groups) == 1 & is.na(A.groups[1]); if(default) {A.groups = seq(1,n); A.names=""} 
  default = length(R.groups) == 1 & is.na(R.groups[1])
  if(default & varcov.R != "equalvarcov") {R.groups = seq(1,n); R.names=paste(":",R.groups,sep="")}
  if(default & varcov.R == "equalvarcov") {R.groups = rep(1,n); R.names=paste(":",R.groups,sep="") }
  default = length(B.groups) == 1 & is.na(B.groups[1]) 
  if(default) {B.groups = seq(1,m); B.names=paste(":",B.groups,sep="")}

############ Error check that the model is legal or consistent
###########################################################
if(FailedErrorCheck(y, whichPop, U.groups, x00.groups, A.groups, R.groups, Q.groups, B.groups, varcov.Q, varcov.R, B.constraint, Uinit, Qinit, Ainit, Rinit, Binit, x00, V00init,estInteractions, silent)) 
	stop("Model is illegal or not interally consistent")

############# Set the initial conditions and make them the correct dimensions
###########################################################
#make sure all passed in initial conditions have the dim attr set
Uinit=as.array(Uinit); Qinit=as.array(Qinit); Ainit=as.array(Ainit); Rinit=as.array(Rinit); x00=as.array(x00); as.array(V00init)

Ainit=array(Ainit,dim=c(n,1))
Uinit=array(Uinit,dim=c(m,1))
if(length(Qinit)==1 | length(Qinit)==m) Qinit = diag(c(Qinit),m,m)  #else Qinit is m x m so is ok
if(length(Rinit)==1 | length(Rinit)==n) Rinit = diag(c(Rinit),n,n)  #else Rinit is n x n so is ok
#if x00==NA then do a linear regression to get init values
x00.is.NA = length(x00)==1 & is.na(x00[1]); 
if(x00.is.NA) {
    x00init = array(0,dim=c(m,1))
     # do a simple linear regression, estimating the predicted value using the y that x will be scaled to (in case multiple y per x)
     for(i in 1:m) {
    	   this.index = min(which(whichPop==i))
         y.obs = y[this.index,]
         y.obs[which(y.obs==-99)] = NA
         x00init[i] = lm(y.obs~seq(1:numYrs))$fitted.values[1]
         }
  }
else x00init=array(x00,dim=c(m,1))
if(length(V00init)==1 | length(V00init)==m) V00init = diag(c(V00init),m,m)  #otherwise V00init is m x m
#if "fixed", then user passed in a B that they want used
#if(B.constraint=="fixed") B will stay fixed at Binit 
if(B.constraint!="fixed") Binit = makediag(1,m)   

############# Construct the various design matrices
# Z relates observation time series to state processes
if(Z.constraint=="fixed") Z = Zinit #the user passes this in
else {  #Z is constructed and Zinit is not used
  Z = matrix(0,n,m) # this will be filled 0s and 1s...if each site is unique, Z = diag(n)
  for(i in 1:m) Z[which(whichPop==i),i] <- 1
  }

# Set up the grouping structure among different state processes and the different observation time se
A.numGroups <- max(A.groups)
U.numGroups <- max(U.groups)
x00.numGroups <- max(x00.groups)
Q.numGroups <- max(Q.groups)
R.numGroups <- max(R.groups)
B.numGroups <- max(B.groups)

# Create matrices of 0s and 1s for each, to later be used for multiplication   
ZA = matrix(0,n,A.numGroups)   # matrix to allow shared As
for(i in 1:A.numGroups) ZA[which(A.groups == i),i] <- 1
ZU = matrix(0,m,U.numGroups)   # matrix to allow shared growth rates
for(i in 1:U.numGroups) ZU[which(U.groups == i),i] <- 1 
Zx00 = matrix(0,m,x00.numGroups)   # matrix to allow shared x00 values
for(i in 1:x00.numGroups) Zx00[which(x00.groups == i),i] <- 1
ZQ <- matrix(0,m,Q.numGroups)  # matrix to allow shared process variances
for(i in 1:Q.numGroups) ZQ[which(Q.groups==i),i] <- 1 
ZR <- matrix(0,n,R.numGroups)  # matrix to allow shared measurement errs
for(i in 1:R.numGroups) ZR[which(R.groups==i),i] <- 1 
ZB <- matrix(0,m,B.numGroups)  # matrix to allow shared B terms along the diagonal
for(i in 1:B.numGroups) ZB[which(B.groups==i),i] <- 1 


# Print out the model structure as a check for the user
if(!silent) {
  modelStruc = paste("Model Structure is\n","m: ",m," state process(es)\n","n: ",n," observation time series\n",sep="")
  modelStruc = paste(modelStruc,"whichPop: Observation time series assigned to state processes as ",paste(whichPop,collapse=" "),"\n",sep="")
  if(m > 1) modelStruc = paste(modelStruc,"U.groups: State process growth rates assigned to groups as ",paste(U.groups,collapse=" "),"\n",sep="")
  if(m > 1 & varcov.Q == "unconstrained")
    modelStruc = paste(modelStruc,"Q: Process errors have an unconstrained m x m variance-covariance matrix\n",sep="")
  if(m > 1 & varcov.Q == "equalvarcov"){
    modelStruc = paste(modelStruc,"Q: Process errors have an m x m variance-covariance matrix with equal variances and equal covariances\n",sep="")
    modelStruc = paste(modelStruc,"Q.groups argument (if passed in) was ignored\n",sep="")
    }
  if(m > 1 & varcov.Q == "equalvarcov.groups"){
    modelStruc = paste(modelStruc,"Q: Process errors have an m x m variance-covariance matrix with equal variances and equal covariances\n",sep="")
    modelStruc = paste(modelStruc,"in groups (covariance set to zero outside groups)\n",sep="")
    }
  if(m > 1 & varcov.Q == "diagonal"){
    modelStruc = paste(modelStruc,"Q: Process errors are uncorrelated and have a diagonal var-cov matrix.\n",sep="")
    modelStruc = paste(modelStruc,"Q.groups: State process variances assigned to groups as ",paste(Q.groups,collapse=" "),"\n",sep="")
    }
  if(varcov.R == "unconstrained")
    modelStruc = paste(modelStruc,"R: Observation errors have an unconstrained n x n variance-covariance matrix\n",sep="")
  if(varcov.R == "equalvarcov"){
    modelStruc = paste(modelStruc,"R: Observation errors have an n x n variance-covariance matrix with equal variances and equal covariances\n",sep="")
    modelStruc = paste(modelStruc,"R.groups argument (if passed in) was ignored\n",sep="")
    }
  if(varcov.R == "diagonal"){
    modelStruc = paste(modelStruc,"R: Observation errors are uncorrelated and have a diagonal var-cov matrix.\n",sep="")
    modelStruc = paste(modelStruc,"R.groups: Observation variances assigned to groups as ",paste(R.groups,collapse=" "),"\n",sep="")
    }
  if(x00prior == FALSE){
    modelStruc = paste(modelStruc,"x00 is treated as fixed but unknown (estimated). V00=0 (but set larger for the EM algorithm. See help file).\n",sep="")
    }
  else modelStruc = paste(modelStruc,"x00 and V00 specify the (fixed) prior.  x00 is not estimated.\n",sep="")
  if(V00init[1]<0.5){
    modelStruc = paste(modelStruc,"Warning: V00init is small. This will converge slowly (and not at all if V00init=0). See notes in help file.\n",sep="")
    }
  cat(modelStruc)
}

#############Adapt the Z matrix for handling missing values
# include is a n x m matrix to handle missing values; 1 matrix for each year;
# if a data point in time series j in yr i is missing, element j in the diagonal of the include matrix for year i is set to 0
###########################################################
if(Z.constraint=="fixed") include=Zinit
else {
  include <- array(0, dim=c(n,m,numYrs))
  for(i in 1:numYrs) 
    include[,,i] <- makediag(ifelse(y[,i]!=-99,1,0),nrow=n)%*%Z  #include = 1 when data exists, 0 otherwise
  miss <- ifelse(y!=-99,0,1)     # 0=observed, 1=missing
  notmiss <- ifelse(y!=-99,1,0)     # 1=observed, 0=missing
  y[which(y == -99)] <- 0   # Clear out -99s in data; include has been set up so that these values will be ignored
}

#############Start the EM algorithm which is going to keep updating until
# it converges
###########################################################
#Set the first (or initial) values for the parameters
U=Uinit; Q=Qinit; A=Ainit; R=Rinit; x00=x00init; V00=V00init; B=Binit

#############Create record of each variable over all iterations
########################################################### 
if(debugit)
  iter.record <- list(U = array(U,dim=c(m,1)), Q = array(Q,dim=c(m,m,1)), 
    A = array(A,dim=c(n,1)), R = array(R,dim=c(n,n,1)), 
		x00 = array(x00,dim=c(m,1)), loglike = array(-10000, dim=c(1, 1)) )
else
  iter.record <- "Set debugit=TRUE to return iter.record"

#############If we don't do the MC init part, then jump right into the EM algorithm
###########################################################
if(MonteCarloInit == FALSE) {
   # use values passed in by user
   numInits = 0
   numInitSteps = 0
}

drawProgressBar = FALSE #If the time library is not installed, no prog bar
if("time" %in% installed.packages()[,1]){ #then try to install time
if(require(time)==TRUE & MonteCarloInit == TRUE & !silent) { #then we can draw a progress bar
    cat("\n"); cat("> Starting Monte Carlo Initializations\n")
    prev = progressBar()
    drawProgressBar = TRUE
} }
# The first numInits iterations of this loop represent the MCinitialization phase (if MCInit=T),
# the last iteration is the final estimation (if MCInit=F, then goes straight to the last)
bestLL = -1.0e10 #this is only used if MonteCarloInit == TRUE
for(loop in 1:(numInits + 1)) {
   #reset for each loop
   loglike.old <- -10000
   cvg <- 1+tol

   # this determines # of iterations, if in MCInit, then iter = numInitSteps otherwise max.iter
   numIter = ifelse(loop <= numInits,numInitSteps,max.iter)   
   if(MonteCarloInit == TRUE) { # draw random initial conditions / parameter values
       if(U.constraint!="fixed") U = array(runif(m,U.bounds[1],U.bounds[2]),dim=c(m,1))
       Q = makediag(exp(runif(m,logQ.bounds[1],logQ.bounds[2])),nrow=m)
       R = makediag(exp(runif(n,logR.bounds[1],logR.bounds[2])),nrow=n)
       x.lo = ifelse(x00init>0,0.75*x00init,1.25*x00init)
       x.hi = ifelse(x00init>0,1.25*x00init,0.75*x00init)
       x00 = array(runif(m,x.lo,x.hi),dim=c(m,1))
     	 if(drawProgressBar==TRUE) prev = progressBar(loop/numInits,prev)
   }
   # Some initial start draws of the params are so awful that loglike will go down.  We want to discard these cases.
   validDraw = TRUE

   # If we've exceeded the initialization phase, use the best params at end of MCInit for starting final estimation
   if(loop > numInits & MonteCarloInit == TRUE) {
      Q = best.Q
      R = best.R
      U = best.U
      A = best.A
      B = best.B
      x00 = best.x00
      if(!silent) {    cat("\n"); cat("> Starting EM Maximization Routine\n")}
   }

   for(iter in 1:numIter) { 
   		iter.model=list( U=U, Q=Q, R=R, A=A, B=B, x00=x00, V00=V00, include=include, kf.data=y )  
      kf <- Kfilter(iter.model) #call kalman filter/smoother to get x(t)|y(1:T) etc estimates
      loglike.new = kf$loglik
   
      if(iter>1 && is.finite(loglike.old) == TRUE && is.finite(loglike.new) == TRUE ) cvg = loglike.new - loglike.old  ## cvg is improvement in likelihood
		  #If loglike goes down, set validDraw to FALSE
      if(cvg<0) validDraw = FALSE else validDraw = TRUE
      #The routine can produce cvg<0 for iter = 2, because U(iter=2) will use Q(iter=1) which is Qinit.  Qinit is not from a fit to the
      #data, so U(iter=2) might not increase the LL; after iter=2 however, the LL should not go down
      if(iter > 2 & cvg<0 & loop > numInits) print(paste("iter=",iter," LogLike DROPPED: This should not happen.",sep=""))
   	  # if converged, break out
	    if(cvg >= 0 & cvg < tol) break
      
		  # Store loglike for comparison to new one after parameters are updated
		  loglike.old = loglike.new
    
    	################# M STEP Re-estimate U,Q,A,R,B,X00 via ML given x(t) estimate
    	################
    	# Get new A subject to its constraints (update of R will use this)
    	# See notes by EH for derivation
    	##############################################################
    	A.last.iter <- A     
      A=array(0,dim=c(n,1));  #A will be zero except when there are multiple observation time series for a single state process
      # any state process with multiple observation time series will have 1st A set to 0 and rest estimated
      tableA <- as.numeric(table(whichPop))  #figure out which state processes have more than 1 observation time series
      numSubsWith2Sites = which(tableA > 1)
      if(length(numSubsWith2Sites) > 0) {  # added by EW 07/19 to catch the case where A not estimated
          for(subpops in 1:length(numSubsWith2Sites)) {  # loop over all state processes with more than 1 observation time series
           	this.sub = numSubsWith2Sites[subpops]
           	this.n = tableA[this.sub]
           	this.index = which(whichPop==this.sub)
            sum1 <- 0
           	for (i in 1:numYrs) {
            	include.Y <- include[this.index,this.sub,i]   # include is the matrix of 0s/1s for all years, include.Y is the matrix for a single year
            	dim(include.Y) <- c(this.n,1)             #this is just for one state process so nx1 
            	A.if.y.missing = makediag(miss[this.index,i],nrow=this.n)%*%A.last.iter[this.index]   #this is going to give me 0 if have val and old A if not
            	A.if.y.present = array(y[this.index,i],dim=c(this.n,1))-include.Y %*% kf$xtT[this.sub,i]
            	sum1 <- sum1 + A.if.y.present + A.if.y.missing
            	}	 #end for numYrs loop
        		As = sum1/numYrs
            As = As - As[1]
            A[this.index] = As
        	} # end for subpop loop
          A <- as.vector(c(ZA%*%(colSums(makediag(A,nrow=n)%*%ZA) * 1/colSums(ZA))) )           
      } # end if

    	################
    	# EH: If we treat x00 as fixed and unknown, we estimate it.
      # Get new x00, V00 cannot be estimated so it is fixed
    	# S&S Eqn 4.78
    	# If x00 is a known prior, we don't estimate.
    	################################################################
      if(x00prior==FALSE) { 
        x00 = c(kf$x0T)
        x00 = as.vector(c(Zx00%*%(colSums(makediag(x00,nrow=m)%*%Zx00) * 1/colSums(Zx00))) )   #impose grouping
        x00 = array(x00, dim=c(m,1))
        }

     	################
    	# Get new U subject to its constraints (update of Q and B will use this)
    	# See notes by EH and Rich Hinrichsen for derivation
    	################################################################
      # if m=1 or m=n this is going to reduce to U <- (kf$xtT[,numYrs]-kf$xtT[,1])/(numYrs-1)
    	# if some state processes share a u, then we need to take the average across processes sharing a u, taking into account the variance of each process
      X1B0 = 0
      for (i in 2:numYrs) {
        X1B0 = X1B0 + kf$xtT[,i] - B%*%kf$xtT[,i-1]
      }      
      Qinv = chol2inv(chol(Q))    # this is calculated here because used twice below      
      Qinv = (Qinv+t(Qinv))/2     #enforce symmetry
      numer = t(ZU)%*%Qinv%*%X1B0
      denom = chol2inv(chol( t(ZU)%*%Qinv%*%ZU ) )
      if(U.constraint!="fixed") U = ZU%*%(denom%*%numer) / (numYrs-1)
   
    	################
    	# Get new R subject to its constraints (updated  07.09.08 by EH to fix R est bug and by EW to add grouping)
    	# S&S 4.77 with addition of grouping and diagonal constraint variants
    	################################################################
    	sum1 <- 0
    	for (i in 1:numYrs) {
        include.Y <- include[,,i]   # include is the matrix of 0s/1s for all years, include.Y is the matrix for a single year
        dim(include.Y) <- c(n,m)             #stops R from changes the dimensions when m=1 
        include.Y2 <- array(notmiss[,i],dim=c(n,1))
        # this is the updating equation for R
        # EH 7.10.08 THIS IS THE PART THAT IS PREVENTING ESTIMATION OF R COVARIANCES WHEN THERE ARE MISSING VALUES
        R.if.y.missing <- makediag(miss[,i],nrow=n)%*%R   #this is going to give me 0 if have val and R from last iteration if not
        err <- y[,i]-(include.Y %*%kf$xtT[,i]+include.Y2*A)  #residuals: this will be zero when y[j] is missing]
        R.if.y.present <- err%*%t(err) + include.Y %*% kf$VtT[,,i] %*% t(include.Y) #updated R estimate if have y data
        sum1 <- sum1 + R.if.y.present + R.if.y.missing 
    	} #end for loop
    	sum1=(sum1+t(sum1))/2 #enforce symmetry;
    	R_unconstrained = sum1/numYrs #this provides the estimate of the R matrix with diagonal and non-diagonal elements
      #The inv functions don't work well when the diagonal elements get lower than the machine tol, so put a min on those
    	#this could break if R_unconstrained happens to equal an integer
	    if(length(R_unconstrained)==1)
		     R_unconstrained <- max(sqrt(.Machine$double.eps),R_unconstrained)
	    else
		     diag(R_unconstrained)[which(takediag(R_unconstrained) 1) #this will get the average to use for the shared covariance value
            Roffdiag <- (1/(n*(n-1)))*(sum(R_unconstrained)-sum(takediag(R_unconstrained)))
         Roffdiag <- Roffdiag*(matrix(1,n,n)-makediag(1,n))
         R <- Rdiag + Roffdiag
    	} #if R is equal var and equal cov
    
    	# S10, S00, and S11 calculations
      # This is a little different than S&S1 Eqn 4.76 (S&S2 Eqn 6.67-69); theirs is based on the likelihood
    	# as written in Eqn 4.69 (6.64) & and Harvey 4.2.19
    	# We don't have a prior on x0 and what you set V00 at affects the Q M-step calculation
    	# to its detriment.  
    	# Instead I'm using the likelihood calculation from Ghahramani and Hinton which is does 
    	# the sum for the Q bit from 2 to T not 1 to T; So leaves out x00 and V00; see notes by EH
    	# This seems to work better although we should be able to rewrite this with V00=0 for the case where x00 is treated
    	# as fixed but unknown.
    	# S&S and Harvey would start here
    	# S00 <- kf$V0T + kf$x0T%*%t(kf$x0T)
      # S11 <- kf$VtT[,,1] + (kf$xtT[,1]-U)%*%t(kf$xtT[,1]-U)
      # S10 <- kf$Vtt1T[,,1] + (kf$xtT[,1]-U)%*%t(kf$x0T);
      # I switch on 7/22/08 to this since it seems less sensitive to V00 and finds Q with max L with lower tol setting
      # with S&S it climbs Q at more slowly and cvg hits tol before max is reached
    	# put x00 update earlier
      
      # S00 <- V00 + x00%*%t(x00)
      # S11 <- kf$VtT[,,1] + (kf$xtT[,1]-U)%*%t(kf$xtT[,1]-U)
      # S10 <- kf$Vtt1T[,,1] + (kf$xtT[,1]-U)%*%t(x00);

    	################################################################
   		iter.model=list( U=U, Q=Q, R=R, A=A, B=B, x00=x00, V00=V00, include=include, kf.data=y )  
      kf <- Kfilter(iter.model) #call kalman filter/smoother to get x(t)|y(1:T) etc estimates
      loglike.new = kf$loglik
      S00 = 0; S11 = 0; S10 = 0; X1 = 0; X0 = 0
      for (i in 2:numYrs) {
        S00 = S00 + (kf$VtT[,,i-1] + kf$xtT[,i-1]%*%t(kf$xtT[,i-1]));   #sum 2:T E(xt1T%*%t(xt1T))
        S10 = S10 + (kf$Vtt1T[,,i] + kf$xtT[,i]%*%t(kf$xtT[,i-1]));     #sum 2:T E(xtT%*%t(xt1T))
        S11 = S11 + (kf$VtT[,,i] + kf$xtT[,i]%*%t(kf$xtT[,i]));         #sum 2:T E(xtT%*%t(xt1T))
        X0 = X0 + kf$xtT[,i-1]                                          #sum 2:T E(xt1T)
        X1 = X1 + kf$xtT[,i]                                            #sum 2:T E(xtT)
      }

      ################
    	# Get new Q subject to its constraints
    	################################################################
      Q_unconstrained = (S11 - B%*%t(S10) - S10%*%t(B) + B%*%S00%*%t(B)
        -U%*%t(X1) - X1%*%t(U) + U%*%t(B%*%X0) + B%*%X0%*%t(U))/(numYrs-1) + U%*%t(U);    
      # don't use (S11 - B %*% t(S10))/(numYrs-1) like in S&S because that is just for certain cases of the process eqn.

      #The inv functions don't work well when the diagonal elements get lower than the machine tol, so put a min on those
	    if(length(Q_unconstrained)==1)
	       Q_unconstrained <- max(sqrt(.Machine$double.eps),Q_unconstrained)
	    #else diag(Q_unconstrained)[which(diag(Q_unconstrained) 1) 
            Qoffdiag <- (1/(m*(m-1)))*(sum(Q_unconstrained)-sum(takediag(Q_unconstrained)))
         Qoffdiag <- Qoffdiag*(matrix(1,m,m)-makediag(1,m))
         Q <- Qdiag + Qoffdiag;
      }   #if equal variances and covariances
      if(varcov.Q == "equalvarcov.groups") { 
        Qdiag = takediag(Q_unconstrained)
         Qoffdiag <- array(0,dim=c(m,m))
         for(qgrp in 1:Q.numGroups) {
          whichInGroup = which(Q.groups==qgrp)
          Qdiag[whichInGroup] = mean(takediag(Q_unconstrained)[whichInGroup])  #average
          numInGroup=length(whichInGroup)         
          if(numInGroup > 1) 
            Qoffdiag[whichInGroup,whichInGroup] = (1/(numInGroup*(numInGroup-1)))*
              (sum(Q_unconstrained[whichInGroup,whichInGroup])-sum(takediag(Q_unconstrained[whichInGroup,whichInGroup])))
         }
         if(m>1) diag(Qoffdiag)=0  #the if stops R from creating an Qoffdiag x Qoffiag matrix is Qoffdiag happens to be an interger
         Q <- makediag(Qdiag) + Qoffdiag;
      }   #if equal variances and covariances in groups

      ################
    	# EW: Get new B subject to its constraints
    	################################################################
      if(B.constraint!="fixed" & B.constraint!="identity") {
        iter.model=list( U=U, Q=Q, R=R, A=A, B=B, x00=x00, V00=V00, include=include, kf.data=y )  
        kf <- Kfilter(iter.model) #call kalman filter/smoother to get x(t)|y(1:T) etc estimates
        loglike.new = kf$loglik
        S00 = 0; S11 = 0; S10 = 0; X1 = 0; X0 = 0
        for (i in 2:numYrs) {
          S00 = S00 + (kf$VtT[,,i-1] + kf$xtT[,i-1]%*%t(kf$xtT[,i-1]));   #sum 2:T E(xt1T%*%t(xt1T))
          S10 = S10 + (kf$Vtt1T[,,i] + kf$xtT[,i]%*%t(kf$xtT[,i-1]));     #sum 2:T E(xtT%*%t(xt1T))
          S11 = S11 + (kf$VtT[,,i] + kf$xtT[,i]%*%t(kf$xtT[,i]));         #sum 2:T E(xtT%*%t(xt1T))
          X0 = X0 + kf$xtT[,i-1]                                          #sum 2:T E(xt1T)
          X1 = X1 + kf$xtT[,i]                                            #sum 2:T E(xtT)
        }
        B_unconstrained = (S10-U%*%t(X0))%*%chol2inv(chol(S00)) 
        }
      if(B.constraint == "unconstrained") B <- B_unconstrained 
      if(B.constraint == "diagonal") { #allows shared B's along diagonal
         if(varcov.Q=="diagonal") {
            B=makediag( takediag(S10-U%*%t(X0))/takediag(S00) )
            B <- makediag(c(ZB%*%(colSums(B%*%ZB) * 1/colSums(ZB))),nrow=m) #ZB allows grouping of Bs
            }
         else {
            stop("Currently Kalman-EM can't estimate B diagonal if Q not diagonal too")
            # EEH: I have not figured out the update equation for this case.
            }
         
         }
       
    	################
    	# Keep a record of the iterations; this is to allow user to debug if they are having problems with convergence
    	# Written this way since we don't know ahead of time how long the record will be
			# that depends on how quickly it converges
			################################################################
    	if(debugit & loop > numInits){
        iter.record$R = array(c(iter.record$R,R),dim=c(n,n,iter+1))   
        iter.record$U  <- array(c(iter.record$U,U),dim=c(m,iter+1))
        iter.record$x00  <- array(c(iter.record$x00,x00),dim=c(m,iter+1))
        iter.record$Q <- array(c(iter.record$Q,Q),dim=c(m,m,iter+1))
        iter.record$A <- array(c(iter.record$A,A),dim=c(n,iter+1))
        iter.record$loglike <- c(iter.record$loglike,loglike.new)
      }
   }  # end inner iter loop

   # If doing a MCInit, check whether the likelihood is the best observed
   # Only use bootstrap param draws where loglike did not go down during numInitSteps
   if(validDraw == TRUE & MonteCarloInit == TRUE & loglike.new > bestLL) {
      # update the best initial parameter estimates
      best.Q = Q
      best.R = R
      best.U = U
      best.A = A
      best.B = B
      best.x00 = x00
      bestLL = loglike.new
   }
   
}   # end iter loop that is running until the EM algorithm converges

# Fix the initial loglike from -10000
if(debugit){
  iter.record$loglike[1] <- iter.record$loglike[2]
}
if(!silent){
	cat(paste("Finished in ",iter," interations. Max.iter was ",max.iter,".\n",sep=""))
}
# confidence intervals based on state std errors, see caption of Fig 6.3 (p337) Shumway & Stoffer
#if(m==1) states.se = sqrt(kf$VtT[,,1:numYrs])
#if(m > 1) {
   states.se = matrix(0,nrow=m,ncol=numYrs)
   for(i in 1:numYrs) states.se[,i] = t(sqrt(takediag(kf$VtT[,,i])))
#}
# This calculates the correct likelihood when x00 is estimated, that is treated as fixed but unknown
# Since it is fixed, V00=0 but we can't run the EM algorithm with V00=0 or it would not converge
if(x00prior==FALSE) {
  V00=0*V00       #if x00prior==FALSE, then x00 is treated as fixed but unknown and V00=0
  iter.model=list( U=U, Q=Q, R=R, A=A, B=B, x00=x00, V00=V00, include=include, kf.data=y )  
  kf = Kfilter(iter.model); 
  loglike=kf$loglik
  }
else loglike=loglike.new
   
# AIC calculation
B.params = ifelse(!(B.constraint %in% c("fixed","identity")), m * m, 0) 
if(x00prior==FALSE) x00.params = length(unique(as.vector(x00)) ) else x00.params=0
Q.params = unique(as.vector(Q))
Q.params = length(Q.params[Q.params != 0])
R.params = unique(as.vector(R))
R.params = length(R.params[R.params != 0])
U.params = length(unique(as.vector(U)))
A.params = unique(as.vector(A))
A.params = length(A.params[A.params != 0])
K = x00.params + B.params + Q.params + R.params + U.params + A.params
AICval = -2*loglike + 2*K
AICc = ifelse(sum(include)>(K+1),-2*loglike + 2*K*(sum(include)/(sum(include)-K-1)),"NA, data points less than K+1")

# make sure R change the dimensions of any of the matrices- 01/20/09
if(is.matrix(B)==FALSE) B = as.matrix(B)
if(is.matrix(Q)==FALSE) Q = as.matrix(Q)
if(is.matrix(R)==FALSE) R = as.matrix(R)
if(is.matrix(V00)==FALSE) V00 = as.matrix(V00)
U=array(U,dim=c(m,1))
A=array(A,dim=c(n,1))
x00=array(x00,dim=c(m,1))

# fix the names to the user specified names 
state.names = levels(as.factor(whichPop))
rownames(U) = paste(state.names,U.names,sep="")
rownames(A) = paste(site.names,whichPop.names, sep="")
rownames(x00) = paste(state.names,x00.names,sep="")
rownames(V00) = state.names
colnames(V00) = state.names
rownames(Q) = paste(state.names,Q.names,sep="")
colnames(Q) = paste(state.names,Q.names,sep="")
rownames(R) = paste(site.names,R.names,whichPop.names,sep="")
colnames(R) = paste(site.names,R.names,whichPop.names,sep="")
rownames(B) = state.names
colnames(B) = state.names
rownames(kf$xtT) =  state.names

return(list(states=kf$xtT,states.se=states.se,A=A,B=B,Q=Q,R=R,U=U,Z=Z,x00=x00,V00=V00,V0T=kf$V0T,Kt=kf$Kt,Innov=kf$Innov,Sigma=kf$Sigma,m=m, n=n, 
numYrs=numYrs,iter=iter,loglike=as.vector(loglike), AIC=as.vector(AICval), AICc=as.vector(AICc), K=as.vector(K), iter.record=iter.record, 
whichPop=whichPop, R.groups=R.groups, Q.groups=Q.groups, U.groups=U.groups, varcov.R=varcov.R, varcov.Q=varcov.Q, 
estInteractions=estInteractions, x00prior=x00prior, data=orig.data, kf.data=y, include=include, version=vers))

}  #End of the KalmanEM function

#######################################################################################################
#   Kfilter
#######################################################################################################
Kfilter = function(mssm.model) {
     # EH: This returns the output from the  Kalman filter and smoother
     # ** All eqn refs are to 2nd ed of Shumway & Stoffer (2006): Time Series Analysis and Its Applications

		#attach would be risky here since user might have one of these variables in their workspace
		U=mssm.model$U; Q=mssm.model$Q; R=mssm.model$R; A=mssm.model$A; Phi=mssm.model$B; x00=mssm.model$x00; V00=mssm.model$V00
		include=mssm.model$include;  #this is the design matrix, called A in S&S, with 0s at missing values
		y=mssm.model$kf.data  #kf.data has missing values replaced by 0 and is n x T (not T x n)
    n=dim(y)[1]; numYrs=dim(y)[2]; m=dim(as.matrix(Q))[1]
    
    #initialize - these are for the forward, Kalman, filter
    # for notation purposes, 't' represents current point in time, 'T' represents the length of the series
    Vtt <- array(0,dim=c(m,m,numYrs))     # Analagous to S&S Ptt, E[Vtt|y(1:t)]
    Vtt1 <- array(0,dim=c(m,m,numYrs))    # Analagous to S&S Ptt1, E[Vtt1|y(1:t)]
    xtt <- array(0,dim=c(m,numYrs))       # E[x(t) | Y(t)]
    xtt1 <- array(0,dim=c(m,numYrs))      # E[x(t) | Y(t-1)]
    innov <- array(0,dim=c(n,numYrs))     # these are innovations, parentheses in 6.21 
    vt <- array(0,dim=c(n,numYrs))        # used for likelihood, vt equivalent to epsilon, eqn 6.62
    Ft <- array(0,dim=c(n,n,numYrs))      # used for likelihood, Ft equivalent to sigma matrix eqn 6.62
    # these are for the backwards, Rausch, filter
    VtT <- array(0,dim=c(m,m,numYrs))     # E[Vtt|y(1:T)]
    J <- array(0,dim=c(m,m,numYrs))       # see eqn 6.49
    Vtt1T <- array(0,dim=c(m,m,numYrs))   # E[Vtt1|y(1:T)]
    xtT <- array(0,dim=c(m,numYrs))       # E[x | y(1:T)]
    Kt <- array(0, dim=c(m,n,numYrs))     # 3D matrix of Kalman gain, EW added 11/14/08
    
    #forward pass gets you E[x(t) given y(1:t)]
    xtt1[,1] <- Phi%*%x00 + U        # eqn 6.19
    Vtt1[,,1] <- Phi%*%V00%*%t(Phi) + Q          # eqn 6.20
    include.Y <- include[,,1]     # used for missing data
    dim(include.Y) <- c(n,m)  #stops R from changes the dimensions when m=1 
    include.Y2 <- (include.Y%*%array(1,dim=c(m,1))) # EH: create include.Y for A that is nx1
    # R needs to be modified per eqn 6.78 to accommodate missing values when R is not diagonal
    siginv = include.Y%*%Vtt1[,,1]%*%t(include.Y)+R    # bracketed piece of eqn 6.23
    siginv=chol2inv(chol(siginv))         # now siginv is sig[[i]]^{-1}       
    siginv = (t(siginv)+siginv)/2 #Vtt1 happens to be symmetric since it is V00+Q; although in general E(xt t(xt1)) is not symmetric
    Kt[,,1] <- Vtt1[,,1]%*%t(include.Y) %*% siginv;    #broke siginv to impose symmetry, eqn 6.23

    innov[,1] <- y[,1] - (include.Y%*%xtt1[,1] + include.Y2*A)
    xtt[,1] <- xtt1[,1] + Kt[,,1]%*%innov[,1];   # eqn 6.21
    Vtt[,,1] <- Vtt1[,,1]-Kt[,,1]%*%include.Y%*%Vtt1[,,1];      # eqn 6.22, detail after 6.28
    Vtt[,,1] <- (Vtt[,,1]+t(Vtt[,,1]))/2;       #Vtt must be symmetric
    # The following are needed for the likelihood calculation
		# the missing values will contribute 0.0 for the LL calc
    # R_mod is needed for the corrected likelihood calculation when there are missing values
		# See section 12.3 in Time Series: Theory and Methods (1991) Peter J. Brockwell, Richard A. Davis
		include.Y3 <- diag(n)-makediag(include.Y2)  #put 1's on the diagonal where there are missing values
    R_mod = include.Y3 + t(makediag(include.Y2)) %*% R %*% makediag(include.Y2)
    vt[,1] <- y[,1]-(include.Y%*%xtt1[,1]+include.Y2*A) #3.3.4; need to hold on to this for loglik calc
    Ft[,,1] <- include.Y%*%Vtt1[,,1]%*%t(include.Y)+R_mod #need to hold on to this for loglik calc
    Ft[,,1] <- (Ft[,,1]+t(Ft[,,1]))/2; #to ensure its symetric

    for (t in 2:numYrs) {
       xtt1[,t] <- Phi%*%xtt[,t-1] + U  #xtt1 denotes x_t^(t-1), eqn 6.19
       Vtt1[,,t] <- Phi%*%Vtt[,,t-1]%*%t(Phi) + Q                  # eqn 6.20
       Vtt1[,,t] <- (Vtt1[,,t]+t(Vtt1[,,t]))/2   #in general Vtt1 is not symmetric but here it is since Vtt and Q are
       include.Y <- include[,,t]  
       dim(include.Y) <- c(n,m)  #stops R from changes the dimensions when m=1  
	     include.Y2 <- (include.Y%*%array(1,dim=c(m,1))) # I'm futzing with the include.Y for A to make it nx1
       # R needs to be modified per eqn 6.78
       Kt[,,t] <- Vtt1[,,t]%*%t(include.Y) %*% chol2inv(chol(include.Y%*%Vtt1[,,t]%*%t(include.Y)+R))   # eqn 6.23
       innov[,t] <- y[,t] - (include.Y%*%xtt1[,t] + include.Y2*A)
       xtt[,t] <- xtt1[,t] + Kt[,,t]%*%innov[,t]   # eqn 6.21
       Vtt[,,t] <- Vtt1[,,t]-Kt[,,t]%*%include.Y%*%Vtt1[,,t]                             # eqn 6.22, detail after 6.28
       Vtt[,,t] <- (Vtt[,,t]+t(Vtt[,,t]))/2 #to ensure its symetric
       # Variables needed for the likelihood calculation; see comments above
    	 include.Y3 <- diag(n)-makediag(include.Y2)
       R_mod = include.Y3 + t(makediag(include.Y2)) %*% R %*% makediag(include.Y2)
       vt[,t] <- y[,t]-(include.Y%*%xtt1[,t]+include.Y2*A) #need to hold on to this for loglik calc
       Ft[,,t] <- include.Y%*%Vtt1[,,t]%*%t(include.Y)+R_mod #need to hold on to this for loglik calc
       Ft[,,t] <- (Ft[,,t]+t(Ft[,,t]))/2 #to ensure its symetric
    }
    KT <- Kt[,,t];

    #indexing is 0 to T for the backwards (Rausch) recursions
    #backward pass gets you E[x(t)|y(1:T)] from E[x(t)|y(1:t)]
    xtT[,numYrs] <- xtt[,numYrs]  
    VtT[,,numYrs] <- Vtt[,,numYrs]
    s <- seq(numYrs,2)
    for(i in 1:(numYrs-1)) {
        yr <- s[i]   #equivalent to T:-1:0  
        Vinv <- chol2inv(chol(Vtt1[,,yr]))
        Vinv <- (Vinv + t(Vinv))/2 #to enforce symmetry after chol2inv call
        J[,,yr-1] <- Vtt[,,yr-1]%*%t(Phi)%*%Vinv     # eqn 6.49
        xtT[,yr-1] <- xtt[,yr-1] + J[,,yr-1]%*%(xtT[,yr]-xtt1[,yr])     # eqn 6.47
        VtT[,,yr-1] <- Vtt[,,yr-1] + J[,,yr-1]%*%(VtT[,,yr]-Vtt1[,,yr])%*%t(J[,,yr-1])  # eqn 6.48
        VtT[,,yr-1] <- (VtT[,,yr-1]+t(VtT[,,yr-1]))/2     #VtT is symmetric
    }
    Vinv <- chol2inv(chol(Vtt1[,,1]))
    Vinv <- (Vinv + t(Vinv))/2 #to enforce symmetry after chol2inv call
    J0 <- V00%*%t(Phi)%*%Vinv                      # eqn 6.49
    x0T <- x00 + J0%*%(xtT[,1]-xtt1[,1]);          # eqn 6.47
    V0T <- V00 + J0%*%(VtT[,,1]-Vtt1[,,1])*t(J0)   # eqn 6.48
    V0T <- (V0T+t(V0T))/2;

    #run another backward recursion to get E[x(t)x(t-1)|y(T)]
    include.Y <- include[,,numYrs]  
    dim(include.Y) <- c(n,m) #stops R from changes the dimensions when m=1  
    Vtt1T[,,numYrs] <- (makediag(1,m) - KT%*%include.Y)%*%Phi%*%Vtt[,,numYrs-1] #eqn. 6.55 this is Var(x(T)x(T-1)|y(T))
    s <- seq(numYrs,3)
    for (i in 1:(numYrs-2)) {
       yr <- s[i]
       Vtt1T[,,yr-1] <- Vtt[,,yr-1]%*%t(J[,,yr-2]) + J[,,yr-1]%*%(Vtt1T[,,yr]-Phi%*%Vtt[,,yr-1])%*%t(J[,,yr-2])   #eqn 6.56
    }
    Vtt1T[,,1] <- Vtt[,,1]%*%t(J0) + J[,,1]%*%(Vtt1T[,,2]-Phi%*%Vtt[,,1])%*%t(J0)

    #Calculate log likelihood, see eqn 6.62
    loglik <- -sum(include)/2*log(2*pi)
    for (i in 1:numYrs) {
        if(length(Ft[,,i])==1) detFt <- Ft[,,i] else detFt <- det(Ft[,,i])
        Ftinv <- chol2inv(chol(Ft[,,i]))
        Ftinv <- (Ftinv +t(Ftinv))/2 #enforce symmetry; Ft is symmetric
        loglik <- loglik - (1/2)%*%t(vt[,i]) %*% Ftinv %*% vt[,i] - (1/2)*log(detFt);
    }
    return(list(xtT = xtT, VtT = VtT, Vtt1T = Vtt1T, x0T = x0T, V0T = V0T, loglik = loglik, Vtt = Vtt, Vtt1 = Vtt1, J=J, Kt=Kt, xtt1 = xtt1, Innov=innov, Sigma=Ft))
}

###############################################################################################################################################
#   FailedErrorCheck
###############################################################################################################################################
FailedErrorCheck = function(y, whichPop, U.groups, x00.groups, A.groups, R.groups, Q.groups, B.groups, varcov.Q, varcov.R, B.constraint, Uinit, Qinit, Ainit, Rinit, Binit, x00, V00init,estInteractions, silent) {
############# Error check to make sure user isn't trying to set up an illegal or inconsistent model structure

#################### Check whichPop
n = dim(y)[1] #each row is obs ts
if(length(whichPop) != n) {
	cat("Error: whichPop is not in the correct form; it should be 1xn or nx1; \n")
        cat("n is the number of time series (i.e. columns) in your dataset.\n") 
        return(TRUE) }
m = max(whichPop)  # this is the number of state processes
if( FALSE %in% ( (1:m) %in% whichPop ) ) {
	cat("Error: Something is wrong with whichPop.\n")  
	cat(paste("max(whichPop) is ", m, " and each of 1:m should appear in whichPop.\n",sep=""))
        cat("But some of the 1:m are missing from your whichPop.\n")
        return(TRUE) } 

#################### Check varcov.Q
if( (varcov.Q %in% list("unconstrained", "diagonal", "equalvarcov", "equalvarcov.groups"))==FALSE ) {
	cat("Error: options for varcov.Q are unconstrained, diagonal or equalvarcov (passed as text in quotes) \n" ); return(TRUE) }
if( varcov.Q == "equalvarcov" & !( !(FALSE %in% (Q.groups == rep(1,m))) | (length(Q.groups==1) & is.na(Q.groups[1])) ) ) {
	cat("Error: Mismatch between varcov.Q and Q.groups.  varcov.Q=equalvarcov but Q.groups != rep(1,m) or NA. \n"); return(TRUE) }

#################### Check varcov.R
if( (varcov.R %in% list("unconstrained", "diagonal", "equalvarcov"))==FALSE ) {
	cat("Error: options for varcov.R are unconstrained, diagonal or equalvarcov (passed as text in quotes) \n" ); return(TRUE) }
if( (-99 %in% y) & varcov.R != "diagonal" ) {
	cat("Sorry! The current code cannot estimate observation error covariances when there are missing data; set varcov.R to diagonal \n"); return(TRUE) }
if( varcov.R == "equalvarcov" & !( !(FALSE %in% (R.groups == rep(1,n))) | (length(R.groups==1) & is.na(R.groups[1])) ) ) {
	cat("Error: Mismatch between varcov.R and R.groups.  varcov.R=equalvarcov but R.groups != rep(1,n) or NA. \n"); return(TRUE) }

################### B.constraint checked below after Binit is checked

#################### Check Q.groups
if(length(Q.groups) != m) {
	cat("Error: There is a mismatch between the number of state processes and length of Q.groups \n")
	cat("max(whichPop) must equal length(Q.groups) so that each state is assigned a Q group. \n")
	return(TRUE) 
 }
not.all.m.in.groups = FALSE %in% ( (1:m) %in% Q.groups )
if( not.all.m.in.groups & varcov.Q == "unconstrained" ) {
	cat("Sorry! The code cannot apply groupings of Qs except to diagonal or equalvarcov Q matrices \n"); return(TRUE) }
# Check that each state process is assigned to a Q group
if(FALSE %in% ((1:max(Q.groups)) %in% Q.groups) ) {
	cat(paste("Error: something is wrong with Q.groups You have ",max(Q.groups)," Q groups but not all appear in Q.groups \n",sep="")); return(TRUE) }

#################### Check U.groups length
U.groups=as.array(U.groups) #set dim attr
if(!(identical(floor(dim(U.groups)),floor(c(m,1))) | identical(floor(dim(U.groups)),floor(c(1,m))) | identical(floor(dim(U.groups)),floor(m)) )) {
	cat(paste("Error: U.groups should be length m. ","Model is m = ",m," as currently set up.\n",sep="")); return(TRUE) }
if(FALSE %in% ((1:max(U.groups)) %in% U.groups) ) {
	cat(paste("Error: something is wrong with U.groups. You have ",max(U.groups)," U groups but not all appear in U.groups. \n",sep="")); return(TRUE) }

#################### Check R.groups length
R.groups=as.array(R.groups) #set dim attr
if(!(identical(floor(dim(R.groups)),floor(c(n,1))) | identical(floor(dim(R.groups)),floor(c(1,n))) | identical(floor(dim(R.groups)),floor(n)) )) {
	cat("Error: There is a mismatch between the number of measurement time series and length of R.groups. \n")
	cat("The number of columns of data must equal length(R.groups) so that each time series (column) is assigned an R group. \n")
        return(TRUE) }
not.all.n.in.groups = !(setequal(1:n, R.groups))   #check that R.groups is some combo of 1:n (previous check made sure that length(R.groups)==n
if( not.all.n.in.groups & varcov.R == "unconstrained" ) {
	cat("Sorry! The code cannot apply R.groups except to diagonal R matrices; set varcov.R to diagonal. \n"); return(TRUE) }
if(!setequal(1:max(R.groups), R.groups)) {
	cat(paste("Error: something is wrong with R.groups. You have 1:",max(R.groups)," R groups but not all appear in R.groups.\n",sep="")); return(TRUE) }
	
#################### Check B.groups
if(length(B.groups) != m) {
	cat("Error: There is a mismatch between the number of state processes and length of B.groups \n")
	cat("max(whichPop) must equal length(B.groups) so that each state is assigned a B group. \n")
	return(TRUE) 
 }
not.all.m.in.groups = FALSE %in% ( (1:m) %in% B.groups )
if( not.all.m.in.groups & B.constraint == "unconstrained" ) {
	cat("Sorry! The code cannot apply groupings to B estimation except for diagonal B matrices \n"); return(TRUE) }   
#if B.constaint == "fixed" or "identity" then B.groups can be passed in.  It is used just for names on the B matrix.
# Check that each state process is assigned to a B group
if(FALSE %in% ((1:max(B.groups)) %in% B.groups) ) {
	cat(paste("Error: something is wrong with B.groups You have ",max(B.groups)," B groups but not all appear in B.groups \n",sep="")); return(TRUE) }

#################### Check A.groups
A.groups=as.array(A.groups) #set dim attr
if(!(identical(floor(dim(A.groups)),floor(c(n,1))) | identical(floor(dim(A.groups)),floor(c(1,n))) | identical(floor(dim(A.groups)),floor(n)) )) {
	cat("Error: There is a mismatch between the number of measurement time series and length of A.groups. \n")
	cat("The number of columns of data must equal length(A.groups) so that each time series (column) is assigned an A group. \n")
        return(TRUE) 
        }

#################### Check x00.groups
x00.groups=as.array(x00.groups) #set dim attr
if(!(identical(floor(dim(x00.groups)),floor(c(m,1))) | identical(floor(dim(x00.groups)),floor(c(1,m))) | identical(floor(dim(x00.groups)),floor(m)) )) {
	cat(paste("Error: x00.groups should be length m","Model is m = ",m," as currently set up.\n",sep="")); return(TRUE) }
                
############# Check that the initial values are correctly sized
###########################################################
Uinit=as.array(Uinit) #set dim attr
if(!(identical(floor(dim(Uinit)),floor(c(m,1))) | identical(floor(dim(Uinit)),floor(c(1,m))) | identical(floor(dim(Uinit)),floor(m)) | identical(floor(dim(Uinit)),floor(1)))) {
	cat("Error: Uinit should be scalar, m x 1 or 1 x m; Either your Uinit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
Qinit=as.array(Qinit)  #make sure dim attr is set
if(!(identical(floor(dim(Qinit)),floor(c(m,1))) | identical(floor(dim(Qinit)),floor(c(1,m))) 
  | identical(floor(dim(Qinit)),floor(m)) | identical(floor(dim(Qinit)),floor(1)) | identical(floor(dim(Qinit)),floor(c(m,m)))) ) {
	   cat("Error: Qinit should be scalar, 1 x m, m x 1 or m x m; Either your Qinit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
Ainit=as.array(Ainit) #set dim attr
if(!(identical(floor(dim(Ainit)),floor(c(n,1))) | identical(floor(dim(Ainit)),floor(c(1,n))) | identical(floor(dim(Ainit)),floor(n)) | identical(floor(dim(Ainit)),floor(1)))) {
	cat("Error:L Ainit should be scalar, n x 1 or 1 x n; Your Ainit is the wrong size.\n"); return(TRUE) }
Rinit=as.array(Rinit)  #make sure dim attr is set
if(!(identical(floor(dim(Rinit)),floor(c(n,1))) | identical(floor(dim(Rinit)),floor(c(1,n))) 
  | identical(floor(dim(Rinit)),floor(n)) | identical(floor(dim(Rinit)),floor(1)) | identical(floor(dim(Rinit)),floor(c(n,n)))) ) {
	   cat("Error: Rinit should be scalar, 1 x n, n x 1 or n x n; Either your Rinit is wrong or your flags are setting n to something you are not expecting.\n"); return(TRUE) }
Binit=as.array(Binit)  #make sure dim attr is set
if(!((length(Binit)==1 & is.na(Binit[1])) | identical(floor(dim(Binit)),floor(c(m,m)))) ) {
	   cat("Error: Binit should be m x m or NA (if not being fixed); Either your Binit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) } 
if(!(length(Binit)==1 & is.na(Binit[1])) & NA %in% Binit)  {
	   cat("Error: Binit cannot contain NAs. \n"); return(TRUE) } 
x00=as.array(x00)  #make sure dim attr is set
if(!(identical(floor(dim(x00)),floor(c(m,1))) | identical(floor(dim(x00)),floor(c(1,m))) | identical(floor(dim(x00)),floor(m)) | identical(floor(dim(x00)),floor(1)))) {
	cat("Error: x00 should be a scalar or a m x 1 or 1 x m vector; Either your x00 is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
V00init=as.array(V00init)   #floor is necessary to get R to really make dim integer (grrr)
if(!(identical(floor(dim(V00init)),floor(1)) | identical(floor(dim(V00init)),floor(m)) | identical(floor(dim(V00init)),floor(c(1,1))) | identical(floor(dim(V00init)),floor(c(m,1))) 
	| identical(floor(dim(V00init)),floor(c(1,m))) | identical(floor(dim(V00init)),floor(c(m,m)))) ) {
	cat("Error: V00init should be length 1, m or m x m; Either your V00init is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
	
#################### Check B.constraint (depends on Binit passing its checks, i.e. is either NA or m x m)
if(estInteractions==FALSE) B.constraint="identity" #backwards compatibility
if( (B.constraint %in% list("fixed", "identity", "diagonal", "unconstrained"))==FALSE ) {
	cat("Error: options for B.constraint are fixed, identity, unconstrained, or diagonal (passed as text in quotes) \n" ); return(TRUE) }
if( B.constraint == "fixed" & (length(Binit)==1 & is.na(Binit[1])) )  {
	cat("Error: If B.constraint==fixed then Binit must be set to what B should be held fixed at. \n"); return(TRUE) }
B.is.not.na=FALSE
if(length(Binit)==(m*m) & length(Binit)!=1) B.is.not.identity = (FALSE %in% (Binit==makediag(1,m)))
if(length(Binit)==1 & !is.na(Binit[1]))  B.is.not.na = TRUE
if(length(Binit)==1 & is.na(Binit[1]))  {B.is.not.na = FALSE; B.is.not.identity=TRUE}
if( B.constraint == "identity" & B.is.not.identity & B.is.not.na )  {
	   cat("Error: If B.constraint==identity then Binit must be set to NA or a m x m identity matrix. \n"); return(TRUE) }
if( B.constraint == "unconstrained" & !(silent) ) print("You are estimating an unconstrained B matrix.  Use caution.  As of July 2009, we are not sure the EM algorithm works very well for this case.")
return(FALSE)
}

###############################################################################################################################################
#   FailedArgumentCheck
###############################################################################################################################################
FailedArgumentCheck = function(y, whichPop, Uinit, Qinit, Ainit, Rinit, x00, V00init, max.iter, varcov.Q, varcov.R, U.groups, Q.groups, R.groups, U.bounds, logQ.bounds, logR.bounds, x00prior, estInteractions, MonteCarloInit, numInits, numInitSteps, tol, silent, debugit)
#This checks that the user didn't specify illegal values for the arguments
{
if(!is.numeric(Ainit) | any(is.na(Ainit)) )  {
	cat("Error: Ainit must be numeric and no NAs allowed.\n"); return(TRUE) }
if(!is.numeric(Uinit) | any(is.na(Uinit)) )  {
	cat("Error: Uinit must be numeric and no NAs allowed.\n"); return(TRUE) }
if(!is.numeric(Rinit) | !(all(diag(as.matrix(Rinit))>0)) | any(is.na(Rinit)) )  {
	cat("Error: Rinit must be numeric, >0 on the diagonal and no NAs allowed.\n"); return(TRUE) }
if(!is.numeric(Qinit) | !(all(diag(as.matrix(Qinit))>0)) | any(is.na(Qinit)) )  {
	cat("Error: Qinit must be numeric, >0 on the diagonal and no NAs allowed.\n"); return(TRUE) }
if(!is.numeric(V00init) | !(all(diag(as.matrix(V00init))>0)) | any(is.na(V00init)) )  {
	cat("Error: V00init must be numeric and positive on the diagonal.\n"); return(TRUE) }
if( !(x00prior %in% c(TRUE, FALSE) ) ) {
	cat("Error: x00prior must be TRUE or FALSE.  It tells whether to treat x00 as known distribution (prior) or fixed but unknown.\n"); return(TRUE) }

if( !(MonteCarloInit %in% c(TRUE, FALSE) ) ) {
	cat("Error: MonteCarloInit must be TRUE or FALSE.  It tells whether to do a random initial conditions start.\n"); return(TRUE) }
if( MonteCarloInit==TRUE & numInitSteps<1 ){
	cat("Error: When doing an MCInit (MonteCarloInit=TRUE), numInitSteps must be >= 1.\n"); return(TRUE) }
if( MonteCarloInit==TRUE & numInits<1 ){
	cat("Error: When doing an MCInit (MonteCarloInit=TRUE), numInits must be >= 1.\n"); return(TRUE) }
	
return(FALSE)
}

###############################################################################################################################################
#   diag helper functions
###############################################################################################################################################
takediag = function(x)
############# Function to take the diagonal; deals with R trying to think too much with diag()
{
if(length(x)==1) return(x)
else return(diag(x))
}

makediag = function(x,nrow=NA)
############# Function to make a diagonal matrix; deals with R trying to think too much with diag()
{
if(length(x)==1) 
{
if(is.na(nrow)) nrow=1
return(diag(c(x),nrow))
}
if((is.matrix(x) | is.array(x)))
   if(!(dim(x)[1]==1 | dim(x)[2]==1))  stop("Error in call to makediag; x is not vector")
if(is.na(nrow)) nrow=length(x)
return(diag(c(x),nrow))
}


#######################################################################################################
#   bootstrapMSSM
#######################################################################################################
bootstrapMSSM = function(nBoot, model, maxIter = 5000, tol=0.01, onlyAIC = NA, output="MLests", parametric=TRUE, silent=FALSE) {
  # all equation numbers refer to Chapter 6, Shumway & Stoffer
  # nBoot is the number of bootstrap replicates to do
  # model is a specified model (needs the structure and parameter elements of the list)
  # onlyAIC=FALSE : return all the parameter estimates from the bootstrap iterates; this is needed for bootstrap CIs
  # output tells what output to produce; this can be a vector
  #      bootdata : returns the simulated (or bootstrapped) data.
  #      MLests : return the ML parameter estimates from the bootstrapped data
  #      AICb, Param.AICb, Nonparam.AICb: AICb does Nonparam.AICb by default but switches to Param.AICb if there are missing values
  #      AICi returns AICi
  # parametric = TRUE forces the new data generated for output="bootdata" and/or "MLests" to use parametric bootstrapping
  # silent is a flag to indicate whether the progress bar should be printed
  
  ###### Check that package mvtnorm is installed and load it if it isn't already loaded
  if(require(mvtnorm)==FALSE) {
     str1 = "The bootstrapping functions require the package mvtnorm and you haven't installed it yet.\n"
     str2 = "Install the package before continuing by\n"
     str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
     cat(paste(str1,str2,str3,sep=""))
     stop("the mvtnorm package is not installed")
  }
  ###### Error-checking on the arguments
  if(onlyAIC %in% c(TRUE,FALSE)) stop("onlyAIC doesn't do anything anymore; use output=c() to describe what output you want")
  if(FALSE %in% (output %in% c("bootdata","MLests","AICb","Hessian.AICb","Param.AICb","Nonparam.AICb","AICi"))) stop("incorrect bootstrapMSSM arg: output must be either bootdata, MLests, AICb, Param.AICb, Nonparam.AICb or AICi (in quotes)")
  if(!(parametric %in% c(TRUE,FALSE))) stop("incorrect bootstrapMSSM arg: parametric must be either TRUE or FALSE")
  if(!(silent %in% c(TRUE,FALSE))) stop("incorrect bootstrapMSSM arg: silent must be either TRUE or FALSE")
  if(!is.numeric(tol)) stop("incorrect bootstrapMSSM arg: tol must be a number")
  if(!is.numeric(maxIter)) stop("incorrect bootstrapMSSM arg: maxIter must be a number")
  if( ("bootdata" %in% output) | ("MLests" %in% output) ) onlyAIC = FALSE  # return parameter estimates
  ### 2-7-09 Temporary backwards compatibility because I added new term to model object
  if(is.null(model$estInteractions)) model$estInteractions=FALSE
  
  numStates = model$m      # number of subpopulations
  numYears = model$numYrs  # length of time series
  numSites = model$n       # number of time series

  sigma.Sqrt=NA; BKS=NA; eig=NA;  #dummy values in case parametric = TRUE
  errors = t(stdInnov(model$Sigma, model$Innov))  # standardized innovations; time going down columns
  # Error-checking and warnings regarding regular versus parametric bootstrapping
  if( ("AICb" %in% output | "Nonparam.AICb" %in% output ) & silent==FALSE & length(which(errors==0))!=0 ) {
        cat("Alert: Dataset has missing values. AICb calculation will use parametric bootstrapping instead of regular bootstrapping.\n")
        output = output[output != "Nonparam.AICb"]  # Pull out Nonparam.AICb if user passed that in
        output = output[output != "AICb"]  # Pull out AICb if user passed that in and replace with Param.AICb
        output=c(output,"Param.AICb") }
  if( ("AICb" %in% output ) &  parametric==TRUE  & silent==FALSE & length(which(errors==0))==0) {
        cat("Alert: AICb calculation uses nonparametric bootstrapping by default.  Pass in Param.AICb to use parametric bootstrapping.\n")
        output = output[output != "AICb"]  # Pull out AICb and replace with Nonparam.AICb
        output=c(output,"Nonparam.AICb") }
  if(length(which(errors==0))!=0) parametric = TRUE # only can do nonparametric bootstrapping if there are no missing values

  if(parametric == FALSE | ("Nonparam.AICb" %in% output) ){
    # Calculate the sqrt of sigma matrices, so they don't have to be computed 5000+ times
    sigma.Sqrt = array(0,dim=c(numSites, numSites, numYears))
    BKS = array(0,dim=c(numStates, numSites, numYears)) 
    for(i in 1:numYears) {
      eig = eigen(model$Sigma[,,i])   # calculate inv sqrt(sigma[1])
      sigma.Sqrt[,,i] = eig$vectors %*% makediag(sqrt(eig$values)) %*% t(eig$vectors)
      BKS[,,i] = model$B %*% model$Kt[,,i] %*% sigma.Sqrt[,,i]   # this is m x n
    }
  }
  if("Hessian.AICb" %in% output) {
  	if(silent==FALSE) cat("Computing the Hessian.  This might take awhile.\n")
  	modelhess = emHessian(model)
		paramsigma = solve(modelhess$Hessian)
		parammean = vectorizemssm(model)
		}
		 
  boot.params=NA; boot.states=NA; boot.data=NA;  logL=NA #dummy values 
  # these are the arrays for output
  if("bootdata" %in% output) boot.data = array(NA,dim=c(dim(as.matrix(model$data)),nBoot))
  if("MLests" %in% output)  {
    boot.params=array(NA,dim=c(length(vectorizemssm(model)),nBoot))
    boot.states = array(NA,dim=c(dim(as.matrix(model$states)),nBoot))
  }
  
  logL.thetaStar=0; logL.Hessian.AICb=0; logL.Param.AICb=0; logL.Nonparam.AICb=0; logL.AICi=0; logL2.AICi=0;  
  Hessian.AICb=NA; Param.AICb=NA; Nonparam.AICb=NA; AICi=NA; eqn8=NA; AICi.approx=NA;  #so R knows about these
  drawProgressBar = FALSE #If the time library is not installed, no prog bar
  if("time" %in% installed.packages()[,1]){ #then try to install time
  if(require(time)==TRUE & !silent) { #then we can draw a progress bar
    prev = progressBar()
    drawProgressBar = TRUE
    }   }
    
  for(i in 1:nBoot) {
    if( ("MLests" %in% output) | ("Param.AICb" %in% output & parametric==TRUE) | ("Nonparam.AICb" %in% output & parametric==FALSE) | "bootdata" %in% output)
        newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS, parametric.bootstrapping=parametric )  # make new data

    if("bootdata" %in% output) boot.data[,,i] = as.matrix(newData)
    
    if( ("MLests" %in% output) | ("Param.AICb" %in% output & parametric==TRUE) | ("Nonparam.AICb" %in% output & parametric==FALSE) ) {   
      # estimate the MLEs (theta*) using the bootstrapped data 
      boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=model$Q,Rinit=model$R, x00=model$x00, estInteractions=model$estInteractions, tol=tol,silent=TRUE,max.iter=maxIter)

      if("MLests" %in% output ) {
  		  # store the parameter estimates from the bootstraped data fitting
  		  boot.params[,i]=vectorizemssm(boot.model)
        boot.states[,,i] = as.matrix(boot.model$states)  }     
			
       #************AICb calculations      
       # obtain the logL of theta* under the original data
       if( "Param.AICb" %in% output & parametric==TRUE ) {
          tmp.model=list( kf.data=model$kf.data, include=model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B, 
			       x00=boot.model$x00, V00=boot.model$V00 )  
          logL.Param.AICb[i] = Kfilter(tmp.model)$loglik }
       if( "Nonparam.AICb" %in% output & parametric==FALSE) {
          tmp.model=list( kf.data=model$kf.data, include=model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B, 
			       x00=boot.model$x00, V00=boot.model$V00 )  
		     logL.Nonparam.AICb[i] = Kfilter(tmp.model)$loglik   }
     } #end if MLests or AICb in output

     #in this case, newData (generated above was not parametrically bootstrapped, so have to create new data
     if( ("Param.AICb" %in% output & parametric==FALSE) ) {
        newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS, parametric.bootstrapping=TRUE )  # make new data
        boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=model$Q,Rinit=model$R, x00=model$x00, estInteractions=model$estInteractions, tol=tol,silent=TRUE,max.iter=maxIter)
        tmp.model=list( kf.data=model$kf.data, include=model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B, 
           x00=boot.model$x00, V00=boot.model$V00 )  
        logL.Param.AICb[i] = Kfilter(tmp.model)$loglik
        }
     #in this case, newData (generated above was parametrically bootstrapped, so have to create new nonparametrically bootstrapped data
     if( ("Nonparam.AICb" %in% output & parametric==TRUE) ) {
        newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS, parametric.bootstrapping=FALSE )  # make new data
        boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=model$Q,Rinit=model$R, x00=model$x00, estInteractions=model$estInteractions, tol=tol,silent=TRUE,max.iter=maxIter)
        tmp.model=list( kf.data=model$kf.data, include=model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B, 
           x00=boot.model$x00, V00=boot.model$V00 )  
        logL.Nonparam.AICb[i] = Kfilter(tmp.model)$loglik
        }
     if( "Hessian.AICb" %in% output ) {
        hess.params=rmvnorm(1, mean=parammean, sigma=paramsigma, method="chol")
        tmp.model=model
        tmp.model=vectorizemssm(tmp.model,paramvector=hess.params)
        kftry = try(Kfilter(tmp.model),silent=TRUE)
        logL.Hessian.AICb[i] = ifelse(!inherits(kftry, "try-error"),kftry$loglik,NA)
        }
          
    #************AICi calculations   
    if("AICi" %in% output){  
      simData = rmvnorm(model$numYrs, mean = rep(0, nrow(as.matrix(model$R))), sigma = diag(nrow(as.matrix(model$R))), method="chol")  #make 0,1    
      Astart = rep(0,numSites)
      Rstart = rep(0.5,numSites)
      Qstart = rep(0.5,numStates)
      Ustart = rep(0, numStates)
      x00start = rep(0, numStates)
      # estimate the MLEs (theta*) using the 0,1 sim data 
      boot.model = KalmanEM(simData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=Ustart,Ainit=Astart,Qinit=Qstart,Rinit=Rstart, x00=x00start, estInteractions=model$estInteractions, tol=tol,silent=T,max.iter=maxIter)     
      # store the logL of theta* under the sim data
      logL.AICi[i] = boot.model$loglike
      # generate more sim data
      simData2 = rmvnorm(model$numYrs, mean = rep(0, nrow(as.matrix(model$R))), sigma = diag(nrow(as.matrix(model$R))), method="chol")  #make 0,1 data
      # obtain the logL of theta* under the sim.star data
      tmp.model=list( kf.data=t(simData2), include=boot.model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B, 
         x00=boot.model$x00, V00=boot.model$V00 )  
      Kf.star = Kfilter(tmp.model)     
      logL2.AICi[i] = Kf.star$loglik 

      #B&C eqn 8 
      tmpt = 0  # just a temporary holder
      for(t in 1:numYears) {
          include.Y <- boot.model$include[,,t]  
          dim(include.Y) <- c(numSites,numStates)  #stops R from changes the dimensions when m=1  
          include.Y2 <- (include.Y%*%array(1,dim=c(numStates,1))) # make an include.Y for A that is nx1
          tmp = include.Y%*%Kf.star$xtt1[,t]+include.Y2*boot.model$A
          Ftinv <- chol2inv(chol(boot.model$Sigma[,,t]))
          tmpt = tmpt + t(tmp) %*% Ftinv %*% tmp  + sum(diag(Ftinv))
          }
      eqn8[i] = tmpt
      }        #end AICi section

    # Step 6: Draw the progress bar if silent=F and time library is installed
    if(drawProgressBar) prev <- progressBar(i/nBoot,prev)
  } # end of for loop for nBoots
  
  if( "Param.AICb" %in% output ) # Note that the AICb calc generally uses bootstrapped data not parametric data like AICi
    Param.AICb = -4*(sum(logL.Param.AICb))/nBoot + 2*model$loglike   # -2*model$loglike + 2*(1/N)*(-2)*sum(boot$Yloglike-model$loglike) 
  if( "Hessian.AICb" %in% output ) # Note that the AICb calc generally uses bootstrapped data not parametric data like AICi
    Hessian.AICb = -4*mean(logL.Hessian.AICb,na.rm=TRUE) + 2*model$loglike   # -2*model$loglike + 2*(1/N)*(-2)*sum(boot$Yloglike-model$loglike) 
  if( "Nonparam.AICb" %in% output ) # Note that the AICb calc generally uses bootstrapped data not parametric data like AICi
    Nonparam.AICb = -4*(sum(logL.Nonparam.AICb))/nBoot + 2*model$loglike   # -2*model$loglike + 2*(1/N)*(-2)*sum(boot$Yloglike-model$loglike) 
  if( "AICi" %in% output )  {
    AICi = -2*model$loglike + (-2)*(1/nBoot)*sum(logL2.AICi - logL.AICi)
    Tconstant = numYears*numSites
    AICi.approx = -2 * model$loglike + (-1 * Tconstant + (1/nBoot)*sum( eqn8 ))
    }
    
  return(list(Hessian.AICb=Hessian.AICb, Nonparam.AICb = Nonparam.AICb, Param.AICb = Param.AICb, AICi=AICi, AICi.approx=AICi.approx, logL.Hessian.AICb=logL.Hessian.AICb, logL.Param.AICb=logL.Param.AICb, logL.Nonparam.AICb=logL.Nonparam.AICb, logL.AICi = logL.AICi, boot.params=boot.params, boot.states=boot.states, boot.data=boot.data))
}

###############################################################################################################################################
#   makeNewData
###############################################################################################################################################
makeNewData = function(Errors, model, Sigma, sigmaSqrt, B.Kt.sqrtSigma, parametric.bootstrapping=FALSE, return.state.process=FALSE ) {
   # Errors = std innovations
   newData = matrix(-99, model$numYrs, model$n)
   newStates = matrix(-99, model$numYrs+1, model$m) # States = years x subpops
   numMissingVals = length(which(Errors==0))

   if(numMissingVals == 0 & parametric.bootstrapping==FALSE) {
      # Then the bootstrap algorithm proceeds
      # Stoffer & Wall suggest not sampling from innovations 1-3 (not much data)
      minIndx = ifelse(model$numYrs > 5, 3, 1)
      samp = sample(seq(minIndx+1, model$numYrs), size = (model$numYrs-minIndx), replace = TRUE)
      e = as.matrix(Errors)
      e[1:minIndx,] = as.matrix(Errors[1:minIndx,])
      e[(minIndx+1):model$numYrs,] = as.matrix(Errors[samp,])
      # newStates is a[] in the writeup by EH
      newStates[1,] = model$x00
      #newStates[2,] = model$B %*% model$x00 + model$U + model$B %*% model$Kt[,,1] %*% sigmaSqrt[,,1] %*% e[1,]
      newStates[2,] = model$B %*% newStates[1,] + model$U + B.Kt.sqrtSigma[,,1] %*% e[1,]
      newData[1,] = newStates[1,][model$whichPop] + model$A + sigmaSqrt[,,1] %*% e[1,]  # eqn 1.7 in writeup by EH
      for(i in 2:model$numYrs) {  # Deal with years 2:T, use sigma, Kt, B, U, A
         newStates[i+1,] = model$B %*% newStates[i,] + model$U + B.Kt.sqrtSigma[,,i] %*% e[i,]
         newData[i,] = newStates[i,][model$whichPop] + model$A + sigmaSqrt[,,i] %*% e[i,]
      }
   }
   if(numMissingVals != 0 | parametric.bootstrapping==TRUE) {
   ###### Check that package mvtnorm is installed and load it if it isn't already loaded
   if(require(mvtnorm)==FALSE) {
      str1 = "The makeNewData functions require the package mvtnorm when doing parametric bootstrapping and you haven't installed it yet.\n"
      str2 = "Install the package before continuing by\n"
      str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
      cat(paste(str1,str2,str3,sep=""))
      stop("the mvtnorm package is not installed")
      }
      newStates[1,] = model$x00 # t = 0
      # create a matrix of RVs for observation error
      obs.error = rmvnorm(model$numYrs, mean = rep(0, nrow(as.matrix(model$R))), sigma = model$R, method="chol")
      pro.error = rmvnorm(model$numYrs, mean = rep(0, nrow(model$Q)), sigma = model$Q, method="chol")
      for(i in 2:(model$numYrs+1)) {
         newStates[i,] = model$B %*% newStates[i-1,] + model$U + pro.error[i-1,]
         newData[i-1,] = model$Z %*% newStates[i,] + model$A + obs.error[i-1,]
      }
      newData[which(Errors==0)]=-99 #newData is going to be used in the KalmanEM function which expects -99 for missing values
   }
if(return.state.process==TRUE) return(newStates)
else return(newData)

}

###############################################################################################################################################
#   stdInnov
###############################################################################################################################################
stdInnov = function(SIGMA, INNOV) {
   # This function added by EW Nov 3, 2008
   # This function is only called once by bootstrapMSSM()
   # SIGMA is covariance matrix, E are original innovations
   numYrs = dim(INNOV)[2]
   numSites = dim(INNOV)[1]
   SI = matrix(0, numSites, numYrs)
   for(i in 1:numYrs) {
      a = SIGMA[,,i]
      a.eig = eigen(a)
      a.sqrt = a.eig$vectors %*% makediag((sqrt(a.eig$values))) %*% solve(a.eig$vectors)
      SI[,i] = chol2inv(chol(a.sqrt)) %*% INNOV[,i]  # eqn 1, p359 S&S
   }
   SI[which(INNOV == 0)]=0   # make sure things that should be 0 are 0
   return(SI)
}

#######################################################################################################
#   vectorizemssm
#######################################################################################################
vectorizemssm = function(mssm.model, paramvector=NA) {
#This helper function
#if paramvector=NA) returns a vector version of all the estimated parameters (for use in say optim) from a mssm model
#if paramvector is passed in) returns a mssm object with the parameters fixed by paramvector

#attach would be risky here since user might have one of these variables in their workspace
U=mssm.model$U; Q=mssm.model$Q; R=mssm.model$R; A=mssm.model$A; B=mssm.model$B; x00=mssm.model$x00;
Q.groups=mssm.model$Q.groups; R.groups=mssm.model$R.groups; U.groups=mssm.model$U.groups
varcov.Q=mssm.model$varcov.Q; varcov.R=mssm.model$varcov.R
n=mssm.model$n; m=mssm.model$m; whichPop=mssm.model$whichPop
if(length(paramvector)==1 & is.na(paramvector[1])) {
paramvec.names=c()
	#x00
		if(mssm.model$x00prior==FALSE) x00.params=as.vector(x00) else x00.params=c()
		paramvec.names=c(paramvec.names,rep("x00",length(x00.params)))
	#B
		if(mssm.model$estInteractions==TRUE) stop("Error in emHessian: Hessian when B estimated is not coded in yet")
	#U
		U.numGroups = max(U.groups)
		ZU = matrix(0,m,U.numGroups)   # matrix to allow shared growth rates
		for(i in 1:U.numGroups) ZU[which(U.groups == i),i] <- 1
		U.params = as.vector(t(ZU)%*%U) / colSums(ZU)
		paramvec.names=c(paramvec.names,rep("U",length(U.params)))
	#Q
		Q.numGroups = max(Q.groups)
		ZQ = matrix(0,m,Q.numGroups)  # matrix to allow shared process variances
		for(i in 1:Q.numGroups) ZQ[which(Q.groups==i),i] = 1
		if(varcov.Q=="diagonal") Q.params = as.vector(t(ZQ)%*%array(takediag(Q),dim=c(m,1))) / colSums(ZQ)
		if(varcov.Q=="equalvarcov") Q.params = c(Q[1,1],Q[1,2])
		if(varcov.Q=="unconstrained") Q.params = as.vector(Q)
		if(varcov.Q=="equalvarcov.groups") stop("varcov.Q=equalvarcov.groups is not coded in yet")
		paramvec.names=c(paramvec.names,rep("Q",length(Q.params)))
	#R
		R.numGroups = max(R.groups)
		ZR <- matrix(0,n,R.numGroups)  # matrix to allow shared measurement errs
		for(i in 1:R.numGroups) ZR[which(R.groups==i),i] <- 1
		if(varcov.R=="diagonal") R.params = as.vector(t(ZR)%*%array(takediag(R),dim=c(n,1))) / colSums(ZR)
		if(varcov.R=="equalvarcov") R.params = c(R[1,1],R[1,2])
		if(varcov.R=="unconstrained") R.params=as.vector(R)
		paramvec.names=c(paramvec.names,rep("R",length(R.params)))
	#A
    A.params=A[which(A!=0)]
    paramvec.names=c(paramvec.names,rep("A",length(A.params)))
	paramvector = c(x00.params, U.params, Q.params, R.params, A.params)
	names(paramvector)=paramvec.names
	return(paramvector)
	} # end if paramvector==NA
	
else {
  names(paramvector)=NULL  #don't want any new names lurking about
  m=mssm.model$m; n=mssm.model$n; whichPop=mssm.model$whichPop
	U.numGroups = max(mssm.model$U.groups)
	Q.numGroups = max(mssm.model$Q.groups)
	R.numGroups = max(mssm.model$R.groups)
	ZU = matrix(0,m,U.numGroups); for(i in 1:U.numGroups) ZU[which(U.groups == i),i] = 1
	ZQ = matrix(0,m,Q.numGroups); for(i in 1:Q.numGroups) ZQ[which(Q.groups==i),i] = 1
	ZR = matrix(0,n,R.numGroups); for(i in 1:R.numGroups) ZR[which(R.groups==i),i] = 1

	#x00
	if(mssm.model$x00prior==FALSE) {
		mssm.model$x00=array(paramvector[1:m],dim=c(m,1))
		paramvector=paramvector[(m+1):length(paramvector)]
		}
	#U
	mssm.model$U = ZU%*%array(paramvector[1:U.numGroups],dim=c(U.numGroups,1))
	paramvector=paramvector[(U.numGroups+1):length(paramvector)]
	#Q
	Q.numGroups = dim(ZQ)[2]
	if(mssm.model$varcov.Q=="diagonal") {
			mssm.model$Q = makediag(ZQ%*%array(paramvector[1:Q.numGroups],dim=c(Q.numGroups,1)))
			paramvector=paramvector[(Q.numGroups+1):length(paramvector)] }
	if(mssm.model$varcov.Q=="unconstrained") {
			mssm.model$Q = array(paramvector[1:(m*m)],dim=c(m,m))
			paramvector=paramvector[(m*m+1):length(paramvector)] }
	if(mssm.model$varcov.Q=="equalvarcov") {
			mssm.model$Q = diag(paramvector[1],m,m)+array(paramvector[2],dim=c(m,m))-diag(paramvector[2],m,m)
			paramvector=paramvector[3:length(paramvector)] }
	#R
	R.numGroups = dim(ZR)[2]
	if(mssm.model$varcov.R=="diagonal") {
		mssm.model$R = makediag(ZR%*%array(paramvector[1:R.numGroups],dim=c(R.numGroups,1)))
		paramvector=paramvector[(R.numGroups+1):length(paramvector)] }
	if(mssm.model$varcov.R=="unconstrained") {
		mssm.model$R = array(paramvector,dim(n,n))
		paramvector=paramvector[(n*n+1):length(paramvector)] }
	if(mssm.model$varcov.R=="equalvarcov") {
		mssm.model$R = diag(paramvector[1],n,n)+array(paramvector[2],dim=c(n,n))-diag(paramvector[2],n,n)
		paramvector=paramvector[3:length(paramvector)] }
	#A
	A=array(0,dim=c(n,1))
	tableA <- as.numeric(table(whichPop))  #figure out which state processes have more than 1 observation time series
  numSubsWith2Sites = which(tableA > 1)
  if(length(numSubsWith2Sites) > 0) {
		for(subpops in 1:length(numSubsWith2Sites)) {
 			this.sub = numSubsWith2Sites[subpops]
   		this.n = tableA[this.sub]
      this.index = which(whichPop==this.sub)
      As = c(0,paramvector[1:(this.n-1)])
      paramvector=paramvector[this.n:length(paramvector)]
      A[this.index] = As
		}
	mssm.model$A=A
	}
  return(mssm.model)
} #end if paramvector arg is passed in
}

#######################################################################################################
#   emHessian
#######################################################################################################
emHessian = function(mssm.model) {
#this function expects a mssm model as output by KalmanEM
if(require(nlme)==FALSE) {
  str1 = "The emHessian function requires the package nlme and you haven't installed it yet.\n"
	str2 = "Install the package before continuing by\n"
	str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
      cat(paste(str1,str2,str3,sep=""))
	stop("Error: missing required nlme package")
}
paramvector=vectorizemssm(mssm.model)

kfNLL=function(paramvec, model){
	new.model=vectorizemssm(model, paramvector=paramvec)
	kf = Kfilter(new.model)
  return(-kf$loglik)
  }
  
#Hessian and gradient
emhess=fdHess(paramvector, function(paramvector, mssm.model) kfNLL(paramvector, mssm.model), mssm.model)
mssm.model$Hessian=emhess$Hessian
mssm.model$gradient=emhess$gradient
mssm.model$parSigma =  try(solve(mssm.model$Hessian))
if(inherits(mssm.model$parSigma, "try-error")) mssm.model$parSigma=NULL
mssm.model$parMean = paramvector

return(mssm.model)
}

#######################################################################################################
#   computeCIs
#######################################################################################################
computeCIs = function(mssm.model, method="hessian", alpha=0.05) {
if(method=="hessian")  {
    #if the model has no Hessian specified, then run emHessian to get it
    if(is.null(mssm.model$Hessian)) mssm.model=emHessian(mssm.model)
    #standard errors
    stderr=try(sqrt(diag(solve(mssm.model$Hessian))))      #invert the Hessian; wrap in a try() in case it fails
    if(inherits(stderr, "try-error")) {
        stderr=rep(NA,length(paramvector))
        warning("Hessian cannot be inverted; stderr = NA is being returned")
        }
    stderr.model=vectorizemssm(mssm.model,paramvector=stderr)
    if(mssm.model$x00prior==TRUE) {stderr.model$x00=NA; stderr.model$V00=NA}
    if(mssm.model$x00prior==FALSE) stderr.model$V00=NA
    if(mssm.model$estInteractions==FALSE) stderr.model$B=NA
    if(mssm.model$estInteractions==TRUE) stderr.model$B="Hessian when B estimated is not coded in yet"
    mssm.model$A.stderr=stderr.model$A
    mssm.model$B.stderr=stderr.model$B
    mssm.model$U.stderr=stderr.model$U
    mssm.model$Q.stderr=stderr.model$Q
    mssm.model$R.stderr=stderr.model$R
    mssm.model$x00.stderr=stderr.model$x00
    mssm.model$V00.stderr=stderr.model$V00
    mssm.model$A.CI.upper=mssm.model$A + qnorm(1-alpha/2)*stderr.model$A
    mssm.model$A.CI.lower=mssm.model$A - qnorm(1-alpha/2)*stderr.model$A
    mssm.model$B.CI.upper=mssm.model$A + qnorm(1-alpha/2)*stderr.model$B
    mssm.model$B.CI.lower=mssm.model$A - qnorm(1-alpha/2)*stderr.model$B
    mssm.model$U.CI.upper=mssm.model$U + qnorm(1-alpha/2)*stderr.model$U
    mssm.model$U.CI.lower=mssm.model$U - qnorm(1-alpha/2)*stderr.model$U
    mssm.model$Q.CI.upper=mssm.model$Q + qnorm(1-alpha/2)*stderr.model$Q
    mssm.model$Q.CI.lower=mssm.model$Q - qnorm(1-alpha/2)*stderr.model$Q
    mssm.model$R.CI.upper=mssm.model$R + qnorm(1-alpha/2)*stderr.model$R
    mssm.model$R.CI.lower=mssm.model$R - qnorm(1-alpha/2)*stderr.model$R
    mssm.model$x00.CI.upper=mssm.model$x00 + qnorm(1-alpha/2)*stderr.model$x00
    mssm.model$x00.CI.lower=mssm.model$x00 - qnorm(1-alpha/2)*stderr.model$x00
    mssm.model$V00.CI.upper=mssm.model$V00 + qnorm(1-alpha/2)*stderr.model$V00
    mssm.model$V00.CI.lower=mssm.model$V00 - qnorm(1-alpha/2)*stderr.model$V00
}
else stop("Only method Hessian programmed currently")
return(mssm.model)
}
Sculpin 0.2 | xhtml | problems or comments? | report bugs