Changes made in revision 1 of KalmanEM.r
--- /tmp/cvx_2rE3Wz 2012-02-07 19:13:08.000000000 -0800
+++ /tmp/cvx_DFOLYn 2012-02-07 19:13:08.000000000 -0800
@@ -1,4 +1,4 @@
-KalmanEM = function(y, whichPop=NA, Uinit=NA, Qinit=NA, Ainit=NA, Rinit=NA, x00=NA, V00init=NA, max.iter=100, varcov.Q="unconstrained", varcov.R="diagonal", U.groups=NA, Q.groups=NA, R.groups=NA, U.bounds=c(-1,1), logQ.bounds = c(log(0.0001),log(1.0)), logR.bounds = c(log(0.0001),log(1.0)), estInteractions=FALSE, MonteCarloInit = FALSE, numInits = 500, numInitSteps = 10, tol=0.01, silent=FALSE)
+KalmanEM = function(y, whichPop=NA, Uinit=NA, Qinit=NA, Ainit=NA, Rinit=NA, x00=NA, V00init=NA, max.iter=5000, varcov.Q="unconstrained", varcov.R="diagonal", U.groups=NA, Q.groups=NA, R.groups=NA, U.bounds=c(-1,1), logQ.bounds = c(log(1.0e-05),log(1.0)), logR.bounds = c(log(1.0e-05),log(1.0)), estInteractions=FALSE, MonteCarloInit = FALSE, numInits = 500, numInitSteps = 10, tol=0.01, silent=FALSE)
{
# Code written by Eric Ward (EW) and Eli Holmes (EH)
@@ -8,7 +8,8 @@
# July 7, 08 EW added arguments whichPop (nx1), R.groups (nx1), U.groups (mx1) and Q.groups (mx1)
# July 12, 08 EH cleaned up error checking
# July 19, 08 EW added a few catches to trap cases where A is never estimated
-
+# Aug 06, 08 EW added names, so that strings may be passed in for groups / sites / species (names also display on output)
+# Nov 08 EW added bootstrapping functions
############## Things that the function needs passed in
###########################################################
# y is the LOG TRANSFORMED data with -99 for MISSING VALS; each column is a separate observation time series; time is down the rox
@@ -25,7 +26,7 @@
############# Check that package MASS is installed and load it if it isn't already loaded
###########################################################
if(require(MASS)==FALSE) {
- str1 = "The KalmanEM function requires the package MASS and you haven't installed it yet.\n"
+ str1 = "The KalmanEM function requires the package MASS and you haven't installed it yet.\n"
str2 = "Install the package before continuing by\n"
str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
cat(paste(str1,str2,str3,sep=""))
@@ -34,20 +35,29 @@
############# Set the values for whichPop, Q.groups, U.groups, R.groups and inits if they were not passed in
###########################################################
-n = dim(y)[1]
-default = length(whichPop)==1 & is.na(whichPop[1]); if(default) whichPop = seq(1,n)
-m = max(whichPop)
-default = length(Q.groups) == 1 & is.na(Q.groups[1]); if(default) Q.groups = seq(1,m)
-default = length(U.groups) == 1 & is.na(U.groups[1]); if(default) U.groups = seq(1,m)
-default = length(R.groups) == 1 & is.na(R.groups[1]); if(default) R.groups = seq(1,n)
-default = length(Uinit)==1 & is.na(Uinit[1]); if(default) Uinit = rep(0,m)
-default = length(Qinit)==1 & is.na(Qinit[1]); if(default) Qinit = rep(0.05,m)
-default = length(Ainit)==1 & is.na(Ainit[1]); if(default) Ainit = rep(0,n)
-default = length(Rinit)==1 & is.na(Rinit[1]); if(default) Rinit = rep(0.05,n)
-default = length(x00)==1 & is.na(x00[1]);
-tmp = y; tmp[tmp==-99] = NA;
-if(default) x00 = rep(mean(tmp,na.rm=T), m) #this use the mean over all the observations for all observation ts
-default = length(V00init)==1 & is.na(V00init[1]); if(default) V00init = rep(1,m);
+ U.names=U.groups
+ if(is.numeric(U.groups)==FALSE) U.groups = as.numeric(as.factor(U.groups))
+ R.names=R.groups
+ if(is.numeric(R.groups)==FALSE) R.groups = as.numeric(as.factor(R.groups))
+ Q.names=Q.groups
+ if(is.numeric(Q.groups)==FALSE) Q.groups = as.numeric(as.factor(Q.groups))
+ n = dim(y)[1]
+ default = length(whichPop)==1 & is.na(whichPop[1]); if(default) whichPop = seq(1,n)
+ m = max(whichPop)
+ default = length(Q.groups) == 1 & is.na(Q.groups[1]); if(default) Q.groups = seq(1,m)
+ default = length(U.groups) == 1 & is.na(U.groups[1]); if(default) U.groups = seq(1,m)
+ default = length(R.groups) == 1 & is.na(R.groups[1]); if(default) R.groups = seq(1,n)
+ default = length(Uinit)==1 & is.na(Uinit[1]); if(default) Uinit = rep(0,m)
+ default = length(Qinit)==1 & is.na(Qinit[1]); if(default) Qinit = rep(0.05,m)
+ default = length(Ainit)==1 & is.na(Ainit[1]); if(default) Ainit = rep(0,n)
+ default = length(Rinit)==1 & is.na(Rinit[1]); if(default) Rinit = rep(0.05,n)
+ default = length(x00)==1 & is.na(x00[1]);
+ tmp = y; tmp[tmp==-99] = NA;
+ if(default) x00 = rep(mean(tmp,na.rm=T), m) #this use the mean over all the observations for all observation ts
+ default = length(V00init)==1 & is.na(V00init[1]); if(default) V00init = rep(1,m);
+ if(length(which(is.na(U.names)==TRUE))) U.names=U.groups
+ if(length(which(is.na(Q.names)==TRUE))) Q.names=Q.groups
+ if(length(which(is.na(R.names)==TRUE))) R.names=R.groups
############ Error check to make sure user did not specify an illegal or inconsistent model
###########################################################
@@ -78,8 +88,7 @@
for(i in 1:R.numGroups) ZR[which(R.groups==i),i] <- 1
# Print out the model structure as a check for the user
-if(!silent)
- {
+if(!silent) {
modelStruc = paste("Model Structure is\n","m: ",m," state process(es)\n","n: ",n," observation time series\n",sep="")
modelStruc = paste(modelStruc,"whichPop: Observation time series assigned to state processes as ",paste(whichPop,collapse=" "),"\n",sep="")
if(m > 1) modelStruc = paste(modelStruc,"U.groups: State process growth rates assigned to groups as ",paste(U.groups,collapse=" "),"\n",sep="")
@@ -100,7 +109,7 @@
modelStruc = paste(modelStruc,"R.groups: Observation variances assigned to groups as ",paste(R.groups,collapse=" "),"\n",sep="")
}
cat(modelStruc)
- }
+}
#############Set up some matrices used for handling missing values
# include is a n x m matrix to handle missing values; 1 matrix for each year;
@@ -109,20 +118,12 @@
include <- array(0, dim=c(n,m,numYrs))
for(i in 1:numYrs)
include[,,i] <- makediag(ifelse(y[,i]!=-99,1,0),nrow=n)%*%Z #include = 1 when data exists, 0 otherwise
+miss <- ifelse(y!=-99,0,1) # 0=observed, 1=missing
+notmiss <- ifelse(y!=-99,1,0) # 1=observed, 0=missing
y[which(y == -99)] <- 0 # Clear out -99s in data; include has been set up so that these values will be ignored
-miss <- ifelse(abs(y)>0,0,1) # 0=observed, 1=missing
-notmiss <- ifelse(abs(y)>0,1,0) # 1=observed, 0=missing
-#############Set up some counters and containers that are used during each iteration
+#############Create record of each variable over all iterations
###########################################################
-LL <- NA;
-converged <- 0
-previous_loglik <- -Inf
-num_iter <- 0
-iter <- 0
-loglike.old <- -10000
-cvg <- 1+tol
-# Create record of each variable over all iterations
iter.record <- list(U = array(0,dim=c(m,max.iter)), Q = array(0,dim=c(m,m,max.iter)),
A = array(0,dim=c(n,max.iter)), R = array(0,dim=c(n,n,max.iter)), loglike = array(0, dim=c(1, max.iter)) )
@@ -145,18 +146,22 @@
y.obs[which(y.obs==0)] = NA
initY1[i] = lm(y.obs~seq(1:numYrs))$fitted.values[1]
}
+bestLL = -1.0e10 #this is only used if MonteCarloInit == TRUE
-bestLL = -1.0e10
-# EW: the first numInits iterations of this loop represent the initialization phase,
-# the last iteration is the final estimation
if(MonteCarloInit == FALSE) {
# use values passed in by user
numInits = 0
numInitSteps = 0
}
+# EW: the first numInits iterations of this loop represent the MCinitialization phase (if MCInit=T),
+# the last iteration is the final estimation (if MCInit=F, then goes straight to the last)
for(loop in 1:(numInits + 1)) {
+ #reset for each loop
+ loglike.old <- -10000
+ cvg <- 1+tol
- numIter = ifelse(loop <= numInits,numInitSteps,max.iter) # this determines # of iterations, depends on phase
+ # this determines # of iterations, if in MCInit, then iter = numInitSteps otherwise max.iter
+ numIter = ifelse(loop <= numInits,numInitSteps,max.iter)
if(MonteCarloInit == TRUE) { # draw random initial conditions / parameter values
U = array(runif(m,U.bounds[1],U.bounds[2]),dim=c(m,1))
Q = makediag(exp(runif(m,logQ.bounds[1],logQ.bounds[2])),nrow=m)
@@ -164,9 +169,10 @@
init.x00 = array(runif(m,0.75*initY1,1.25*initY1),dim=c(m,1))
x00 = init.x00
}
+ # Some initial start draws of the params are so awful that loglike will go down. We want to discard these cases.
validDraw = TRUE
- # If we've exceeded the initialization phase, do the final estimation
+ # If we've exceeded the initialization phase, use the best params for starting final estimation
if(loop > numInits & MonteCarloInit == TRUE) {
Q = best.Q
R = best.R
@@ -177,14 +183,17 @@
for(iter in 1:numIter) {
kf <- Kfilter(m,n,numYrs,U,Q,A,R,B,x00,V00,include,y) #call kalman filter to get x(t)|y(1:T) etc estimates
loglike.new = kf$loglik
+
if(iter>1 && is.finite(loglike.old) == T && is.finite(loglike.new) == T) cvg = loglike.new - loglike.old ## cvg is improvement in likelihood
- if(cvg<0) {
- #cat(paste("Warning: at iter=",iter," likelihood decreased; loglike.new - loglike.old = ", cvg,"\n", sep=""))
- validDraw = FALSE
- }
- #if(cvg >= 0 & cvg < tol) break
- if(cvg >= 0 & cvg < tol & loop > numInits) break # only break if on the last run
- loglike.old = loglike.new
+ # If loglike goes down, set validDraw to FALSE
+ if(cvg<0) validDraw = FALSE
+ else validDraw = TRUE
+
+ # if converged, break out
+ if(cvg >= 0 & cvg < tol) break
+
+ # Store loglike for comparison to new one after parameters are updated
+ loglike.old = loglike.new
################# M STEP Re-estimate U,Q,A,R,B,X00,V00 via ML given x(t) estimate
################
@@ -246,21 +255,21 @@
R_unconstrained = sum1/numYrs #this provides the estimate of the R matrix with diagonal and non-diagonal elements
#The inv functions don't work well when the diagonal elements get lower than the machine tol, so put a min on those
#this could break if R_unconstrained happens to equal an integer
- if(length(R_unconstrained)==1)
- R_unconstrained <- max(sqrt(.Machine$double.eps),R_unconstrained)
- else
- diag(R_unconstrained)[which(takediag(R_unconstrained) 1) #this will get the average to use for the shared covariance value
- Roffdiag <- (1/(n*(n-1)))*(sum(R_unconstrained)-sum(takediag(R_unconstrained)))
- Roffdiag <- Roffdiag*(matrix(1,n,n)-makediag(1,n))
- R <- Rdiag + Roffdiag
+ Rdiag <- mean(takediag(R_unconstrained))*makediag(1,n) #average
+ Roffdiag <- 0
+ if(n > 1) #this will get the average to use for the shared covariance value
+ Roffdiag <- (1/(n*(n-1)))*(sum(R_unconstrained)-sum(takediag(R_unconstrained)))
+ Roffdiag <- Roffdiag*(matrix(1,n,n)-makediag(1,n))
+ R <- Rdiag + Roffdiag
} #if R is equal var and equal cov
################
@@ -290,20 +299,20 @@
# Calculate the interaction matrix, if requested
if(estInteractions==TRUE) B = S10%*%chol2inv(chol(S00))
#The inv functions don't work well when the diagonal elements get lower than the machine tol, so put a min on those
- if(length(Q_unconstrained)==1)
- Q_unconstrained <- max(sqrt(.Machine$double.eps),Q_unconstrained)
- else
- diag(Q_unconstrained)[which(diag(Q_unconstrained) 1)
- Qoffdiag <- (1/(m*(m-1)))*(sum(Q_unconstrained)-sum(takediag(Q_unconstrained)))
- Qoffdiag <- Qoffdiag*(matrix(1,m,m)-makediag(1,m))
- Q <- Qdiag + Qoffdiag;
+ Qdiag <- mean(takediag(Q_unconstrained))*makediag(1,m) #average
+ Qoffdiag <- 0
+ if(m > 1)
+ Qoffdiag <- (1/(m*(m-1)))*(sum(Q_unconstrained)-sum(takediag(Q_unconstrained)))
+ Qoffdiag <- Qoffdiag*(matrix(1,m,m)-makediag(1,m))
+ Q <- Qdiag + Qoffdiag;
} #if equal variances and covariances
################
@@ -321,10 +330,11 @@
iter.record$A[,iter] <- A
iter.record$loglike[iter] <- loglike.new
} # end inner iter loop
-
- # After this iteration, check whether the likelihood is the best observed
- if(validDraw == TRUE & loglike.new > bestLL) {
- # update the best parameter estimates
+
+ # If doing a MCInit, check whether the likelihood is the best observed
+ # Only use bootstrap param draws where loglike did not go down during numInitSteps
+ if(validDraw == TRUE & MonteCarloInit == TRUE & loglike.new > bestLL) {
+ # update the best initial parameter estimates
best.Q = iter.record$Q[,,1]
best.R = iter.record$R[,,1]
best.U = iter.record$U[,1]
@@ -349,7 +359,17 @@
se = matrix(0,nrow=m,ncol=numYrs)
for(i in 1:numYrs) se[,i] = t(sqrt(takediag(kf$VtT[,,i])))
}
-return(list(states=kf$xtT,CI_states.lower = (kf$xtT - 1.96*se), CI_states.upper = (kf$xtT + 1.96*se), U=U,Q=Q,A=A,R=R,B=B,x00=x00,V00=V00,V0T=kf$V0T,iter=iter,loglike=loglike.new,iter.record=iter.record))
+
+names(U) = U.names
+
+if(is.matrix(Q)==F) Q = as.matrix(Q) # this line added to make sure that a scalar isn't passed to rmvnorm - 01/20/09
+
+rownames(Q) = Q.names
+colnames(Q) = Q.names
+rownames(R) = R.names
+colnames(R) = R.names
+states = kf$xtT
+return(list(states=states,CI_states.lower = (kf$xtT - 1.96*se), CI_states.upper = (kf$xtT + 1.96*se), A=A,B=B,Q=Q,R=R,U=U,Z=Z,x00=x00,V00=V00,V0T=kf$V0T,Kt=kf$Kt,Innov=kf$Innov,Sigma=kf$Sigma,m=m, n=n, numYrs=numYrs,iter=iter,loglike=loglike.new,iter.record=iter.record, whichPop=whichPop, R.groups=R.groups, Q.groups=Q.groups, U.groups=U.groups, varcov.R=varcov.R, varcov.Q=varcov.Q, data=t(y), include=include))
} #End of the KalmanEM function
@@ -369,6 +389,7 @@
Vtt1 <- array(0,dim=c(m,m,numYrs)) # Analagous to S&S Ptt1, E[Vtt1|y(1:t)]
xtt <- array(0,dim=c(m,numYrs)) # E[x(t) | Y(t)]
xtt1 <- array(0,dim=c(m,numYrs)) # E[x(t) | Y(t-1)]
+ innov <- array(0,dim=c(n,numYrs)) # these are innovations, parentheses in 6.21
vt <- array(0,dim=c(n,numYrs)) # used for likelihood, vt equivalent to epsilon, eqn 6.62
Ft <- array(0,dim=c(n,n,numYrs)) # used for likelihood, Ft equivalent to sigma matrix eqn 6.62
# these are for the backwards, Rausch, filter
@@ -376,7 +397,8 @@
J <- array(0,dim=c(m,m,numYrs)) # see eqn 6.49
Vtt1T <- array(0,dim=c(m,m,numYrs)) # E[Vtt1|y(1:T)]
xtT <- array(0,dim=c(m,numYrs)) # E[x | y(1:T)]
-
+ Kt <- array(0, dim=c(m,n,numYrs)) # 3D matrix of Kalman gain, EW added 11/14/08
+
#forward pass gets you E[x(t) given y(1:t)]
xtt1[,1] <- Phi%*%c(x00) + U # eqn 6.19
Vtt1[,,1] <- Phi%*%V00%*%t(Phi) + Q # eqn 6.20
@@ -386,10 +408,11 @@
siginv = include.Y%*%Vtt1[,,1]%*%t(include.Y)+R # bracketed piece of eqn 6.23
siginv=chol2inv(chol(siginv)) # now siginv is sig[[i]]^{-1}
siginv = (t(siginv)+siginv)/2 #Vtt1 happens to be symmetric since it is V00+Q; although in general E(xt t(xt1)) is not symmetric
- Kt <- Vtt1[,,1]%*%t(include.Y) %*% siginv; #broke siginv to impose symmetry, eqn 6.23
+ Kt[,,1] <- Vtt1[,,1]%*%t(include.Y) %*% siginv; #broke siginv to impose symmetry, eqn 6.23
- xtt[,1] <- xtt1[,1] + Kt%*%(y[,1] - (include.Y%*%xtt1[,1] + include.Y2*A)); # eqn 6.21
- Vtt[,,1] <- Vtt1[,,1]-Kt%*%include.Y%*%Vtt1[,,1]; # eqn 6.22, detail after 6.28
+ innov[,1] <- y[,1] - (include.Y%*%xtt1[,1] + include.Y2*A)
+ xtt[,1] <- xtt1[,1] + Kt[,,1]%*%innov[,1]; # eqn 6.21
+ Vtt[,,1] <- Vtt1[,,1]-Kt[,,1]%*%include.Y%*%Vtt1[,,1]; # eqn 6.22, detail after 6.28
Vtt[,,1] <- (Vtt[,,1]+t(Vtt[,,1]))/2; #Vtt must be symmetric
# the missing values will contribute 0.0 for this calc
vt[,1] <- y[,1]-(include.Y%*%xtt1[,1]+include.Y2*A) #3.3.4; need to hold on to this for loglik calc
@@ -403,16 +426,17 @@
include.Y <- include[,,t]
dim(include.Y) <- c(n,m) #stops R from changes the dimensions when m=1
include.Y2 <- (include.Y%*%array(1,dim=c(m,1))) # I'm futzing with the include.Y for A to make it nx1
- Kt <- Vtt1[,,t]%*%t(include.Y) %*% chol2inv(chol(include.Y%*%Vtt1[,,t]%*%t(include.Y)+R)) # eqn 6.23
- xtt[,t] <- xtt1[,t] + Kt%*%(y[,t] - (include.Y%*%xtt1[,t] + include.Y2*A)) # eqn 6.21
- Vtt[,,t] <- Vtt1[,,t]-Kt%*%include.Y%*%Vtt1[,,t] # eqn 6.22, detail after 6.28
+ Kt[,,t] <- Vtt1[,,t]%*%t(include.Y) %*% chol2inv(chol(include.Y%*%Vtt1[,,t]%*%t(include.Y)+R)) # eqn 6.23
+ innov[,t] <- y[,t] - (include.Y%*%xtt1[,t] + include.Y2*A)
+ xtt[,t] <- xtt1[,t] + Kt[,,t]%*%innov[,t] # eqn 6.21
+ Vtt[,,t] <- Vtt1[,,t]-Kt[,,t]%*%include.Y%*%Vtt1[,,t] # eqn 6.22, detail after 6.28
Vtt[,,t] <- (Vtt[,,t]+t(Vtt[,,t]))/2 #to ensure its symetric
# missing values will contribute nothing
vt[,t] <- y[,t]-(include.Y%*%xtt1[,t]+include.Y2*A) #need to hold on to this for loglik calc
Ft[,,t] <- include.Y%*%Vtt1[,,t]%*%t(include.Y)+R #need to hold on to this for loglik calc
Ft[,,t] <- (Ft[,,t]+t(Ft[,,t]))/2 #to ensure its symetric
}
- KT <- Kt;
+ KT <- Kt[,,t];
#indexing is 0 to T for the backwards (Rausch) recursions
#backward pass gets you E[x(t)|y(1:T)] from E[x(t)|y(1:t)]
@@ -454,9 +478,10 @@
Ftinv <- (Ftinv +t(Ftinv))/2 #enforce symmetry; Ft is symmetric
loglik <- loglik - (1/2)%*%t(vt[,i]) %*% Ftinv %*% vt[,i] - (1/2)*log(detFt);
}
- return(list(xtT = xtT, VtT = VtT, Vtt1T = Vtt1T, x0T = x0T, V0T = V0T, loglik = loglik, Vtt = Vtt, Vtt1 = Vtt1, J=J, Kt=Kt))
+ return(list(xtT = xtT, VtT = VtT, Vtt1T = Vtt1T, x0T = x0T, V0T = V0T, loglik = loglik, Vtt = Vtt, Vtt1 = Vtt1, J=J, Kt=Kt, xtt1 = xtt1, Innov=innov, Sigma=Ft))
}
+
FailedErrorCheck = function(y, whichPop, U.groups, R.groups, Q.groups, varcov.Q, varcov.R,Uinit,Qinit,Ainit,Rinit,x00) {
############# Error check to make sure user isn't trying to set up an illegal or inconsistent model structure
@@ -537,3 +562,142 @@
if(is.na(nrow)) nrow=length(x)
return(diag(c(x),nrow))
}
+
+
+#######################################################################################################
+# These functions are all for bootstrapping
+#######################################################################################################
+bootstrapMSSM = function(nBoot, model, maxIter = 5000, tol=0.01, onlyAIC = TRUE, silent=FALSE) {
+ # This function updated by EW and EH
+ # all equation numbers refer to Chapter 6, Shumway & Stoffer
+ # onlyAIC is a debug feature to allows all the boot params and boot data iterates to be returned
+ # silent is a flag to indicate whether the progress bar should be printed
+
+ ###### Check that package mvtnorm is installed and load it if it isn't already loaded
+ if(require(mvtnorm)==FALSE) {
+ str1 = "The bootstrapping functions require the package mvtnorm and you haven't installed it yet.\n"
+ str2 = "Install the package before continuing by\n"
+ str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
+ cat(paste(str1,str2,str3,sep=""))
+ stop("the mvtnorm package is not installed")
+ }
+
+ errors = t(stdInnov(model$Sigma, model$Innov)) # standardize innovations
+ numStates = model$m # number of subpopulations
+ numYears = model$numYrs # length of time series
+ numSites = model$n # number of time series
+
+ # Calculate the sqrt of sigma matrices, so they don't have to be computed 5000+ times
+ sigma.Sqrt = array(0,dim=c(numSites, numSites, numYears))
+ BKS = array(0,dim=c(numStates, numSites, numYears))
+ for(i in 1:numYears) {
+ eig = eigen(model$Sigma[,,i]) # calculate inv sqrt(sigma[1])
+ # EW added as.matrix 01/26/09
+ sigma.Sqrt[,,i] = eig$vectors %*% makediag((sqrt(eig$values))) %*% t(eig$vectors)
+ BKS[,,i] = model$B %*% model$Kt[,,i] %*% sigma.Sqrt[,,i] # this is m x n
+ }
+ U=NA; Q=NA; R=NA; A=NA; B=NA; states=NA; boot.data=NA;
+ # these are the arrays for output
+ if(onlyAIC == FALSE) {
+ boot.data = array(0,dim=c(dim(as.matrix(model$data)),nBoot))
+ U = array(0,dim=c(dim(as.matrix(model$U)),nBoot))
+ Q = array(0,dim=c(dim(as.matrix(model$Q)),nBoot))
+ R = array(0,dim=c(dim(as.matrix(model$R)),nBoot))
+ A = array(0,dim=c(dim(as.matrix(model$A)),nBoot))
+ B = array(0,dim=c(dim(as.matrix(model$B)),nBoot))
+ states = array(0,dim=c(dim(as.matrix(model$states)),nBoot))
+ }
+
+ logL.thetaStar = 0
+ logL = 0;
+ drawProgressBar = FALSE #If not time library, no prog bar
+ if(require(time)==TRUE) { #then we can draw a progress bar
+ prev = progressBar()
+ drawProgressBar = TRUE
+ }
+ for(i in 1:nBoot) {
+ newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS) # make new data
+ # Take the ML output and make sure it's in the right form to be passed back in
+ Rstart = diag(model$R)
+ Qstart = diag(model$Q)
+ # Step 2: estimate the MLEs, theta*
+ boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=Qstart,Rinit=Rstart,tol=tol,silent=T,max.iter=maxIter)
+ if(onlyAIC == FALSE) {
+ # Step 3: store the parameter estimates from the bootstraped data fitting
+ boot.data[,,i] = as.matrix(newData)
+ U[,,i] = as.matrix(boot.model$U)
+ Q[,,i] = as.matrix(boot.model$Q)
+ R[,,i] = as.matrix(boot.model$R)
+ A[,,i] = as.matrix(boot.model$A)
+ B[,,i] = as.matrix(boot.model$B)
+ states[,,i] = as.matrix(boot.model$states)
+ }
+ logL[i] = Kfilter(numStates, numSites, numYears, boot.model$U, boot.model$Q, boot.model$A, boot.model$R, boot.model$B, boot.model$x00, boot.model$V00, boot.model$include, t(model$data))$loglik
+ if(drawProgressBar) prev <- progressBar(i/nBoot,prev)
+ }
+
+ AICb = -4*(sum(logL))/nBoot + 2*model$loglike
+ return(list(AICb = AICb, logL.star=logL, U = U, Q = Q, R = R, A = A, B = B, states = states, boot.data=boot.data))
+}
+
+makeNewData = function(Errors, model, Sigma, sigmaSqrt, B.Kt.sqrtSigma) {
+ # This function updated by EW and EH Dec 02, 2008
+ # Errors = std innovations
+ origData = model$data
+
+ newData = matrix(-99, model$numYrs, model$n)
+ newStates = matrix(-99, model$numYrs+1, model$m) # States = years x subpops
+ numMissingVals = length(which(Errors==0))
+
+ if(numMissingVals == 0) {
+ # Then the bootstrap algorithm proceeds
+ # Stoffer & Wall suggest not sampling from innovations 1-3 (not much data)
+ minIndx = ifelse(model$numYrs > 5, 4, 1)
+ samp = sample(seq(minIndx, model$numYrs), size = model$numYrs, replace = T)
+ # EW added as.matrix() on 01/26/09
+ e = as.matrix(Errors[samp,])
+ # newStates is a[] in the writeup by EH
+ newStates[1,] = model$x00
+ #newStates[2,] = model$B %*% model$x00 + model$U + model$B %*% model$Kt[,,1] %*% sigmaSqrt[,,1] %*% e[1,]
+ newStates[2,] = model$B %*% model$x00 + model$U + B.Kt.sqrtSigma[,,1] %*% e[1,]
+ newData[1,] = model$x00[model$whichPop] + model$A + sigmaSqrt[,,1] %*% e[1,] # eqn 1.7 in writeup by EH
+ for(i in 2:model$numYrs) { # Deal with years 2:T, use sigma, Kt, B, U, A
+ newStates[i+1,] = model$B %*% newStates[i,] + model$U + B.Kt.sqrtSigma[,,i] %*% e[i,]
+ newData[i,] = newStates[i,][model$whichPop] + model$A + sigmaSqrt[,,i] %*% e[i,]
+ }
+ newData[which(e == 0)]=-99 #newData is going to be used in the KalmanEM function which expects -99 for missing values
+ # return the new data
+ }
+ if(numMissingVals != 0) {
+ newStates[1,] = model$x00 # t = 0
+ # create a matrix of RVs for observation error
+ obs.error = rmvnorm(model$numYrs, mean = rep(0, nrow(as.matrix(model$R))), sigma = model$R, method="chol")
+ pro.error = rmvnorm(model$numYrs, mean = rep(0, nrow(model$Q)), sigma = model$Q, method="chol")
+ for(i in 2:(model$numYrs+1)) {
+ newStates[i,] = model$B %*% newStates[i-1,] + model$U + pro.error[i-1,]
+ newData[i-1,] = model$Z %*% newStates[i,] + model$A + obs.error[i-1,]
+ }
+ newData[which(Errors==0)]=-99
+ }
+
+ return(newData)
+}
+
+stdInnov = function(SIGMA, INNOV) {
+ # This function added by EW Nov 3, 2008
+ # This function is only called once by bootstrapMSSM()
+ # SIGMA is covariance matrix, E are original innovations
+ numYrs = dim(INNOV)[2]
+ numSites = dim(INNOV)[1]
+ SI = matrix(0, numSites, numYrs)
+ for(i in 1:numYrs) {
+ a = SIGMA[,,i]
+ a.eig = eigen(a)
+ # EW added as.matrix() on 01/26/09 for single trajectory model
+ a.sqrt = a.eig$vectors %*% makediag((sqrt(a.eig$values))) %*% solve(a.eig$vectors)
+ SI[,i] = chol2inv(chol(a.sqrt)) %*% INNOV[,i] # eqn 1, p359 S&S
+ }
+ SI[which(INNOV == 0)]=0 # make sure things that should be 0 are 0
+ return(SI)
+}
+