Changes made in revision 2 of KalmanEM.R
--- /tmp/cvx_B1PnIw 2012-02-07 19:32:05.000000000 -0800
+++ /tmp/cvx_VTesYg 2012-02-07 19:32:05.000000000 -0800
@@ -1,25 +1,27 @@
-KalmanEM = function(y, whichPop=NA, Uinit=NA, Qinit=NA, Ainit=NA, Rinit=NA, x00=NA, V00init=NA, max.iter=5000, varcov.Q="unconstrained", varcov.R="diagonal", U.groups=NA, Q.groups=NA, R.groups=NA, U.bounds=c(-1,1), logQ.bounds = c(log(1.0e-05),log(1.0)), logR.bounds = c(log(1.0e-05),log(1.0)), estInteractions=FALSE, MonteCarloInit = FALSE, numInits = 500, numInitSteps = 10, tol=0.01, silent=FALSE)
-{
-
+KalmanEM = function(y, whichPop=NA, Binit=NA, Uinit=0, Qinit=0.05, Ainit=0, Rinit=0.05, x00=NA, V00init=10, max.iter=5000, varcov.Q="unconstrained", varcov.R="diagonal", B.constraint="identity", U.constraint="unconstrained", A.groups=NA, U.groups=NA, Q.groups=NA, R.groups=NA, x00.groups=NA, B.groups=NA, U.bounds=c(-1,1), logQ.bounds = c(log(1.0e-05),log(1.0)), logR.bounds = c(log(1.0e-05),log(1.0)), x00prior=FALSE, estInteractions=FALSE, MonteCarloInit = FALSE, numInits = 500, numInitSteps = 10, tol=0.01, silent=FALSE, debugit=FALSE)
+{ #comment on x00prior=FALSE; x00prior=TRUE case is not programmed in yet. In that case x00 fixed, not estimated.
+#x00prior=FALSE means that we treat x00 as an unknown fixed parameter that is estimated. So P_0=V00=0, but we don't want to set
+#V00=0 because then the EM algorithm won't converge. We want V00 big, and then at the end, return the likelihood with V00=0
# Code written by Eric Ward (EW) and Eli Holmes (EH)
-# adapted from Matlab code by Eli Holmes/Rich Hinrichsen and R code from Shumway and Stoffer (2006)
-# A description of the algorithm (for the univariate case) is in "An EM algorithm.pdf"
-# July 4, 08, EH added estimation A bias term
-# July 7, 08 EW added arguments whichPop (nx1), R.groups (nx1), U.groups (mx1) and Q.groups (mx1)
-# July 12, 08 EH cleaned up error checking
-# July 19, 08 EW added a few catches to trap cases where A is never estimated
-# Aug 06, 08 EW added names, so that strings may be passed in for groups / sites / species (names also display on output)
-# Nov 08 EW added bootstrapping functions
+# The Kfilter function was based was adapted from Matlab code written by Eli Holmes and Rich Hinrichsen
+# This code estimates structural log-linear autoregressive models
+# The model needs to specified in a log-linear form; for population count data, this means log-transforming the data
+# Will deal with missing values (marked as -99), but this slows computation
+
+
+
############## Things that the function needs passed in
###########################################################
# y is the LOG TRANSFORMED data with -99 for MISSING VALS; each column is a separate observation time series; time is down the rox
-y <- t(y) #for consistency, all data files are assumed to have time going down rows; this algorithm wants time across cols
+orig.data = y
+y <- t(y) #for consistency with how ecological data files are usually formatted, all data files are assumed to have time going down rows; this algorithm wants time across cols
# Initial conditions for the algorithm are passed in as vectors Uinit,Qinit,Rinit,x00
# max.iter is the number of iterations to do
# tol is the tolerence for checking if convergence has happened; lower if convergence happens in just a few iterations
# whichPop, a 1xn vector assigning observation time series to state processes
# varcov.Q and varcov.R specify the structures allowed for Q & R: "unconstrained", "diagonal" or "equalvarcov"
# R.groups is a nx1 vector specifying which observation variances are shared across different state processes; default is all unique
+# A.groups is a nx1 vector specifying which observation biases are shared across different state processes; default is all unique
# U.groups is a mx1 vector specifying which u's are shared across different state processes; default is all unique
# Q.groups is a mx1 vector specifying which process variances are shared across different state processes; default is all unique
@@ -32,60 +34,111 @@
cat(paste(str1,str2,str3,sep=""))
stop("missing MASS package")
}
-
-############# Set the values for whichPop, Q.groups, U.groups, R.groups and inits if they were not passed in
+############ Check that the user didn't pass in any illegal arguments
+if(FailedArgumentCheck(y, whichPop, Uinit, Qinit, Ainit, Rinit, x00, V00init, max.iter, varcov.Q, varcov.R, U.groups, Q.groups, R.groups, U.bounds, logQ.bounds, logR.bounds, x00prior, estInteractions, MonteCarloInit, numInits, numInitSteps, tol, silent, debugit))
+ stop("invalid argument values passed into KalmanEM()")
+
+############# Give names for the rows and columns of y. numYrs is actually the number of time steps
+###########################################################
+ n = dim(y)[1] #R does something strange with dim; floor makes sure it returns an integer
+ numYrs <- dim(y)[2]
+ #m = max(whichPop) and is defined later (after whichPop is defined in the case that whichPop=NA was passed in)
+
+############# Let the user put in names for groups; convert group names to factors
+############# If NA then just returns back to NA
###########################################################
- U.names=U.groups
+ site.names=ifelse(is.null(names(y)), seq(1,n), names(y) )
+ x00.names=paste(":", x00.groups, sep="") #this will get reset if x00.groups=NA
+ if(is.numeric(x00.groups)==FALSE) x00.groups = as.numeric(as.factor(x00.groups))
+ U.names=paste(":", U.groups, sep="") #this will get reset if U.groups=NA
if(is.numeric(U.groups)==FALSE) U.groups = as.numeric(as.factor(U.groups))
- R.names=R.groups
+ A.names=paste(":", A.groups, sep="") #this will get reset if A.groups=NA
+ if(is.numeric(A.groups)==FALSE) A.groups = as.numeric(as.factor(A.groups))
+ R.names=paste(":", R.groups, sep="") #this will get reset if R.groups=NA
if(is.numeric(R.groups)==FALSE) R.groups = as.numeric(as.factor(R.groups))
- Q.names=Q.groups
+ Q.names=paste(":", Q.groups, sep="") #this will get reset if Q.groups=NA
if(is.numeric(Q.groups)==FALSE) Q.groups = as.numeric(as.factor(Q.groups))
- n = dim(y)[1]
- default = length(whichPop)==1 & is.na(whichPop[1]); if(default) whichPop = seq(1,n)
- m = max(whichPop)
- default = length(Q.groups) == 1 & is.na(Q.groups[1]); if(default) Q.groups = seq(1,m)
- default = length(U.groups) == 1 & is.na(U.groups[1]); if(default) U.groups = seq(1,m)
- default = length(R.groups) == 1 & is.na(R.groups[1]); if(default) R.groups = seq(1,n)
- default = length(Uinit)==1 & is.na(Uinit[1]); if(default) Uinit = rep(0,m)
- default = length(Qinit)==1 & is.na(Qinit[1]); if(default) Qinit = rep(0.05,m)
- default = length(Ainit)==1 & is.na(Ainit[1]); if(default) Ainit = rep(0,n)
- default = length(Rinit)==1 & is.na(Rinit[1]); if(default) Rinit = rep(0.05,n)
- default = length(x00)==1 & is.na(x00[1]);
- tmp = y; tmp[tmp==-99] = NA;
- if(default) x00 = rep(mean(tmp,na.rm=T), m) #this use the mean over all the observations for all observation ts
- default = length(V00init)==1 & is.na(V00init[1]); if(default) V00init = rep(1,m);
- if(length(which(is.na(U.names)==TRUE))) U.names=U.groups
- if(length(which(is.na(Q.names)==TRUE))) Q.names=Q.groups
- if(length(which(is.na(R.names)==TRUE))) R.names=R.groups
+ B.names=paste(":", B.groups, sep="") #this will get reset if Q.groups=NA
+ if(is.numeric(B.groups)==FALSE) B.groups = as.numeric(as.factor(Q.groups))
+ whichPop.names=paste(":", whichPop, sep="") #this will get reset if whichPop=NA
+ if(is.numeric(whichPop)==FALSE) whichPop = as.numeric(as.factor(whichPop))
+
+############# Set the values for whichPop, Q.groups, U.groups, R.groups if they were not passed in
+###########################################################
+ default = length(whichPop)==1 & is.na(whichPop[1]); if(default) {whichPop = seq(1,n); whichPop.names=paste(":",whichPop,sep="") }
+ m = max(whichPop)
+ default = length(Q.groups) == 1 & is.na(Q.groups[1])
+ if(default & varcov.Q != "equalvarcov") {Q.groups = seq(1,m); Q.names=paste(":",Q.groups,sep="")}
+ if(default & varcov.Q == "equalvarcov") {Q.groups = rep(1,m); Q.names=paste(":",Q.groups,sep="") }
+ default = length(U.groups) == 1 & is.na(U.groups[1]); if(default) {U.groups = seq(1,m); U.names=paste(":",U.groups,sep="")}
+ #x00 and A groups are special because one would not normally set them, so only give them a name if user explicitly set them
+ default = length(x00.groups) == 1 & is.na(x00.groups[1]); if(default) {x00.groups = seq(1,m); x00.names=""}
+ default = length(A.groups) == 1 & is.na(A.groups[1]); if(default) {A.groups = seq(1,n); A.names=""}
+ default = length(R.groups) == 1 & is.na(R.groups[1])
+ if(default & varcov.R != "equalvarcov") {R.groups = seq(1,n); R.names=paste(":",R.groups,sep="")}
+ if(default & varcov.R == "equalvarcov") {R.groups = rep(1,n); R.names=paste(":",R.groups,sep="") }
+ default = length(B.groups) == 1 & is.na(B.groups[1])
+ if(default) {B.groups = seq(1,m); B.names=paste(":",B.groups,sep="")}
-############ Error check to make sure user did not specify an illegal or inconsistent model
+############ Error check that the model is legal or consistent
###########################################################
-if(FailedErrorCheck(y, whichPop, U.groups, R.groups, Q.groups, varcov.Q, varcov.R,Uinit,Qinit,Ainit,Rinit,x00))
+if(FailedErrorCheck(y, whichPop, U.groups, x00.groups, A.groups, R.groups, Q.groups, B.groups, varcov.Q, varcov.R, B.constraint, Uinit, Qinit, Ainit, Rinit, Binit, x00, V00init,estInteractions))
stop("Model is illegal or not interally consistent")
-############# Set up the model structure: how many state processes and which observation time series go to which state process
+############# Set the initial conditions and make them the correct dimensions
###########################################################
-n <- dim(y)[1] #the user passes in y with time down the rows, but at top of function this is transposed because this func wants time in the cols
-m = max(whichPop) # this is the number of state processes
-numYrs <- dim(y)[2]
+#make sure all passed in initial conditions have the dim attr set
+Uinit=as.array(Uinit); Qinit=as.array(Qinit); Ainit=as.array(Ainit); Rinit=as.array(Rinit); x00=as.array(x00); as.array(V00init)
+
+Ainit=array(Ainit,dim=c(n,1))
+Uinit=array(Uinit,dim=c(m,1))
+if(length(Qinit)==1 | length(Qinit)==m) Qinit = diag(c(Qinit),m,m) #else Qinit is m x m so is ok
+if(length(Rinit)==1 | length(Rinit)==n) Rinit = diag(c(Rinit),n,n) #else Rinit is n x n so is ok
+#if x00==NA then do a linear regression to get init values
+x00.is.NA = length(x00)==1 & is.na(x00[1]);
+if(x00.is.NA) {
+ x00init = array(0,dim=c(m,1))
+ # do a simple linear regression, estimating the predicted value using the y that x will be scaled to (in case multiple y per x)
+ for(i in 1:m) {
+ this.index = min(which(whichPop==i))
+ y.obs = y[this.index,]
+ y.obs[which(y.obs==-99)] = NA
+ x00init[i] = lm(y.obs~seq(1:numYrs))$fitted.values[1]
+ }
+ }
+else x00init=array(x00,dim=c(m,1))
+if(length(V00init)==1 | length(V00init)==m) V00init = diag(c(V00init),m,m) #otherwise V00init is m x m
+#if "fixed", then user passed in a B that they want used
+#if(B.constraint=="fixed") B will stay fixed at Binit
+if(B.constraint!="fixed") Binit = makediag(1,m)
-# Construct the Z matrix based on n, m, and whichPop; this relates observation time series to state processes
+############# Construct the various design matrices
+# Z relates observation time series to state processes
Z = matrix(0,n,m) # this will be filled 0s and 1s...if each site is unique, Z = diag(n)
for(i in 1:m) Z[which(whichPop==i),i] <- 1
# Set up the grouping structure among different state processes and the different observation time se
+A.numGroups <- max(A.groups)
U.numGroups <- max(U.groups)
+x00.numGroups <- max(x00.groups)
Q.numGroups <- max(Q.groups)
R.numGroups <- max(R.groups)
+B.numGroups <- max(B.groups)
# Create matrices of 0s and 1s for each, to later be used for multiplication
+ZA = matrix(0,n,A.numGroups) # matrix to allow shared As
+for(i in 1:A.numGroups) ZA[which(A.groups == i),i] <- 1
ZU = matrix(0,m,U.numGroups) # matrix to allow shared growth rates
-for(i in 1:U.numGroups) ZU[which(U.groups == i),i] <- 1
+for(i in 1:U.numGroups) ZU[which(U.groups == i),i] <- 1
+Zx00 = matrix(0,m,x00.numGroups) # matrix to allow shared x00 values
+for(i in 1:x00.numGroups) Zx00[which(x00.groups == i),i] <- 1
ZQ <- matrix(0,m,Q.numGroups) # matrix to allow shared process variances
for(i in 1:Q.numGroups) ZQ[which(Q.groups==i),i] <- 1
ZR <- matrix(0,n,R.numGroups) # matrix to allow shared measurement errs
for(i in 1:R.numGroups) ZR[which(R.groups==i),i] <- 1
+ZB <- matrix(0,m,B.numGroups) # matrix to allow shared B terms along the diagonal
+for(i in 1:B.numGroups) ZB[which(B.groups==i),i] <- 1
+
# Print out the model structure as a check for the user
if(!silent) {
@@ -94,20 +147,35 @@
if(m > 1) modelStruc = paste(modelStruc,"U.groups: State process growth rates assigned to groups as ",paste(U.groups,collapse=" "),"\n",sep="")
if(m > 1 & varcov.Q == "unconstrained")
modelStruc = paste(modelStruc,"Q: Process errors have an unconstrained m x m variance-covariance matrix\n",sep="")
- if(m > 1 & varcov.Q == "equalvarcov")
+ if(m > 1 & varcov.Q == "equalvarcov"){
+ modelStruc = paste(modelStruc,"Q: Process errors have an m x m variance-covariance matrix with equal variances and equal covariances\n",sep="")
+ modelStruc = paste(modelStruc,"Q.groups argument (if passed in) was ignored\n",sep="")
+ }
+ if(m > 1 & varcov.Q == "equalvarcov.groups"){
modelStruc = paste(modelStruc,"Q: Process errors have an m x m variance-covariance matrix with equal variances and equal covariances\n",sep="")
+ modelStruc = paste(modelStruc,"in groups (covariance set to zero outside groups)\n",sep="")
+ }
if(m > 1 & varcov.Q == "diagonal"){
modelStruc = paste(modelStruc,"Q: Process errors are uncorrelated and have a diagonal var-cov matrix.\n",sep="")
modelStruc = paste(modelStruc,"Q.groups: State process variances assigned to groups as ",paste(Q.groups,collapse=" "),"\n",sep="")
}
if(varcov.R == "unconstrained")
modelStruc = paste(modelStruc,"R: Observation errors have an unconstrained n x n variance-covariance matrix\n",sep="")
- if(varcov.R == "equalvarcov")
+ if(varcov.R == "equalvarcov"){
modelStruc = paste(modelStruc,"R: Observation errors have an n x n variance-covariance matrix with equal variances and equal covariances\n",sep="")
+ modelStruc = paste(modelStruc,"R.groups argument (if passed in) was ignored\n",sep="")
+ }
if(varcov.R == "diagonal"){
modelStruc = paste(modelStruc,"R: Observation errors are uncorrelated and have a diagonal var-cov matrix.\n",sep="")
modelStruc = paste(modelStruc,"R.groups: Observation variances assigned to groups as ",paste(R.groups,collapse=" "),"\n",sep="")
}
+ if(x00prior == FALSE){
+ modelStruc = paste(modelStruc,"x00 is treated as fixed but unknown (estimated). V00=0 (but set larger for the EM algorithm. See help file).\n",sep="")
+ }
+ else modelStruc = paste(modelStruc,"x00 and V00 specify the (fixed) prior. x00 is not estimated.\n",sep="")
+ if(V00init[1]<0.5){
+ modelStruc = paste(modelStruc,"Warning: V00init is small. This will converge slowly (and not at all if V00init=0). See notes in help file.\n",sep="")
+ }
cat(modelStruc)
}
@@ -122,39 +190,38 @@
notmiss <- ifelse(y!=-99,1,0) # 1=observed, 0=missing
y[which(y == -99)] <- 0 # Clear out -99s in data; include has been set up so that these values will be ignored
-#############Create record of each variable over all iterations
-###########################################################
-iter.record <- list(U = array(0,dim=c(m,max.iter)), Q = array(0,dim=c(m,m,max.iter)),
- A = array(0,dim=c(n,max.iter)), R = array(0,dim=c(n,n,max.iter)), loglike = array(0, dim=c(1, max.iter)) )
-
#############Start the EM algorithm which is going to keep updating until
# it converges
###########################################################
#Set the first (or initial) values for the parameters
-U= array(Uinit, dim=c(m,1))
-Q=makediag(Qinit,nrow=m)
-A= array(Ainit,dim=c(n,1))
-R=makediag(Rinit,nrow=n) #again we should be estimating this
-x00=array(x00,dim=c(m,1))
-init.x00 = x00
-V00 <- makediag(V00init,nrow=m); #initial variance of initx;
-B = makediag(1,m) # Interaction matrix: If estimated it will change from identity, otherwise remain constant
-initY1 = 0
-for(i in 1:n) {
- # do a simple linear regression, estimating the predicted value
- y.obs = y[i,]
- y.obs[which(y.obs==0)] = NA
- initY1[i] = lm(y.obs~seq(1:numYrs))$fitted.values[1]
-}
-bestLL = -1.0e10 #this is only used if MonteCarloInit == TRUE
+U=Uinit; Q=Qinit; A=Ainit; R=Rinit; x00=x00init; V00=V00init; B=Binit
+
+#############Create record of each variable over all iterations
+###########################################################
+if(debugit)
+ iter.record <- list(U = array(U,dim=c(m,1)), Q = array(Q,dim=c(m,m,1)),
+ A = array(A,dim=c(n,1)), R = array(R,dim=c(n,n,1)),
+ x00 = array(x00,dim=c(m,1)), loglike = array(-10000, dim=c(1, 1)) )
+else
+ iter.record <- "Set debugit=TRUE to return iter.record"
+#############If we don't do the MC init part, then jump right into the EM algorithm
+###########################################################
if(MonteCarloInit == FALSE) {
# use values passed in by user
numInits = 0
numInitSteps = 0
}
-# EW: the first numInits iterations of this loop represent the MCinitialization phase (if MCInit=T),
+
+drawProgressBar = FALSE #If the time library is not installed, no prog bar
+if(require(time)==TRUE & MonteCarloInit == TRUE & !silent) { #then we can draw a progress bar
+ cat("\n"); cat("> Starting Monte Carlo Initializations\n")
+ prev = progressBar()
+ drawProgressBar = TRUE
+}
+# The first numInits iterations of this loop represent the MCinitialization phase (if MCInit=T),
# the last iteration is the final estimation (if MCInit=F, then goes straight to the last)
+bestLL = -1.0e10 #this is only used if MonteCarloInit == TRUE
for(loop in 1:(numInits + 1)) {
#reset for each loop
loglike.old <- -10000
@@ -166,36 +233,41 @@
U = array(runif(m,U.bounds[1],U.bounds[2]),dim=c(m,1))
Q = makediag(exp(runif(m,logQ.bounds[1],logQ.bounds[2])),nrow=m)
R = makediag(exp(runif(n,logR.bounds[1],logR.bounds[2])),nrow=n)
- init.x00 = array(runif(m,0.75*initY1,1.25*initY1),dim=c(m,1))
- x00 = init.x00
+ x00 = array(runif(m,0.75*x00init,1.25*x00init),dim=c(m,1))
+ if(drawProgressBar==TRUE) prev = progressBar(loop/numInits,prev)
}
# Some initial start draws of the params are so awful that loglike will go down. We want to discard these cases.
validDraw = TRUE
- # If we've exceeded the initialization phase, use the best params for starting final estimation
+ # If we've exceeded the initialization phase, use the best params at end of MCInit for starting final estimation
if(loop > numInits & MonteCarloInit == TRUE) {
Q = best.Q
R = best.R
U = best.U
+ A = best.A
+ B = best.B
x00 = best.x00
+ if(!silent) { cat("\n"); cat("> Starting EM Maximization Routine\n")}
}
for(iter in 1:numIter) {
- kf <- Kfilter(m,n,numYrs,U,Q,A,R,B,x00,V00,include,y) #call kalman filter to get x(t)|y(1:T) etc estimates
+ iter.model=list( U=U, Q=Q, R=R, A=A, B=B, x00=x00, V00=V00, include=include, kf.data=y )
+ kf <- Kfilter(iter.model) #call kalman filter/smoother to get x(t)|y(1:T) etc estimates
loglike.new = kf$loglik
-
- if(iter>1 && is.finite(loglike.old) == T && is.finite(loglike.new) == T) cvg = loglike.new - loglike.old ## cvg is improvement in likelihood
- # If loglike goes down, set validDraw to FALSE
- if(cvg<0) validDraw = FALSE
- else validDraw = TRUE
-
- # if converged, break out
- if(cvg >= 0 & cvg < tol) break
+
+ if(iter>1 && is.finite(loglike.old) == TRUE && is.finite(loglike.new) == TRUE ) cvg = loglike.new - loglike.old ## cvg is improvement in likelihood
+ #If loglike goes down, set validDraw to FALSE
+ if(cvg<0) validDraw = FALSE else validDraw = TRUE
+ #The routine can produce cvg<0 for iter = 2, because U(iter=2) will use Q(iter=1) which is Qinit. Qinit is not from a fit to the
+ #data, so U(iter=2) might not increase the LL; after iter=2 however, the LL should not go down
+ if(iter > 2 & cvg<0 & loop > numInits) print(paste("iter=",iter," LogLike DROPPED: This should not happen.",sep=""))
+ # if converged, break out
+ if(cvg >= 0 & cvg < tol) break
- # Store loglike for comparison to new one after parameters are updated
- loglike.old = loglike.new
+ # Store loglike for comparison to new one after parameters are updated
+ loglike.old = loglike.new
- ################# M STEP Re-estimate U,Q,A,R,B,X00,V00 via ML given x(t) estimate
+ ################# M STEP Re-estimate U,Q,A,R,B,X00 via ML given x(t) estimate
################
# Get new A subject to its constraints (update of R will use this)
# See notes by EH for derivation
@@ -221,20 +293,25 @@
As = sum1/numYrs
As = As - As[1]
A[this.index] = As
- } # end for subpop loop
+ } # end for subpop loop
+ A <- as.vector(c(ZA%*%(colSums(makediag(A,nrow=n)%*%ZA) * 1/colSums(ZA))) )
} # end if
################
- # Get new U subject to its constraints (update of Q will use this)
+ # Get new U subject to its constraints (update of Q and B will use this)
# See notes by EH and Rich Hinrichsen for derivation
################################################################
# if m=1 or m=n this is going to reduce to U <- (kf$xtT[,numYrs]-kf$xtT[,1])/(numYrs-1)
# if some state processes share a u, then we need to take the average across processes sharing a u, taking into account the variance of each process
+ X1B0 = 0
+ for (i in 2:numYrs) {
+ X1B0 = X1B0 + kf$xtT[,i] - B%*%kf$xtT[,i-1]
+ }
Qinv = chol2inv(chol(Q)) # this is calculated here because used twice below
Qinv = (Qinv+t(Qinv))/2 #enforce symmetry
- numer = t(ZU)%*%Qinv%*%(kf$xtT[,numYrs]-kf$xtT[,1])
- denom = 1/takediag((t(ZU)%*%Qinv%*%ZU)*(numYrs-1))
- U = c(ZU%*%(numer*denom))
-
+ numer = t(ZU)%*%Qinv%*%X1B0
+ denom = chol2inv(chol( t(ZU)%*%Qinv%*%ZU ) )
+ if(U.constraint!="fixed") U = ZU%*%(denom%*%numer) / (numYrs-1)
+
################
# Get new R subject to its constraints (updated 07.09.08 by EH to fix R est bug and by EW to add grouping)
# S&S 4.77 with addition of grouping and diagonal constraint variants
@@ -272,14 +349,15 @@
R <- Rdiag + Roffdiag
} #if R is equal var and equal cov
- ################
- # Get new Q subject to its constraints (updated 07.09.08 by by EW to add grouping)
- # This is a little different than S&S Eqn 4.76; theirs is based on the likelihood
- # as written in Eqn 4.69 & and Harvey 4.2.19
- # We don't have a prior on V00 and what you set that at affects the Q M-step calculation
+ # S10, S00, and S11 calculations
+ # This is a little different than S&S1 Eqn 4.76 (S&S2 Eqn 6.67-69); theirs is based on the likelihood
+ # as written in Eqn 4.69 (6.64) & and Harvey 4.2.19
+ # We don't have a prior on x0 and what you set V00 at affects the Q M-step calculation
# to its detriment.
# Instead I'm using the likelihood calculation from Ghahramani and Hinton which is does
- # the sum for the Q bit from 2 to T not 1 to T; see notes by EH
+ # the sum for the Q bit from 2 to T not 1 to T; So leaves out x00 and V00; see notes by EH
+ # This seems to work better although we should be able to rewrite this with V00=0 for the case where x00 is treated
+ # as fixed but unknown.
# S&S and Harvey would start here
# S00 <- kf$V0T + kf$x0T%*%t(kf$x0T)
# S11 <- kf$VtT[,,1] + (kf$xtT[,1]-U)%*%t(kf$xtT[,1]-U)
@@ -287,17 +365,32 @@
# I switch on 7/22/08 to this since it seems less sensitive to V00 and finds Q with max L with lower tol setting
# with S&S it climbs Q at more slowly and cvg hits tol before max is reached
################################################################
- S00 = 0
- S11 = 0
- S10 = 0
+ S00 = 0; S11 = 0; S10 = 0; X1 = 0; X0 = 0
for (i in 2:numYrs) {
- S00 <- S00 + (kf$VtT[,,i-1] + kf$xtT[,i-1]%*%t(kf$xtT[,i-1]));
- S10 <- S10 + (kf$Vtt1T[,,i]+(kf$xtT[,i]-U)%*%t(kf$xtT[,i-1]));
- S11 <- S11 + (kf$VtT[,,i] + (kf$xtT[,i]-U)%*%t(kf$xtT[,i]-U));
+ S00 = S00 + (kf$VtT[,,i-1] + kf$xtT[,i-1]%*%t(kf$xtT[,i-1])); #sum 2:T E(xt1T%*%t(xt1T))
+ S10 = S10 + (kf$Vtt1T[,,i] + kf$xtT[,i]%*%t(kf$xtT[,i-1])); #sum 2:T E(xtT%*%t(xt1T))
+ S11 = S11 + (kf$VtT[,,i] + kf$xtT[,i]%*%t(kf$xtT[,i])); #sum 2:T E(xtT%*%t(xt1T))
+ X0 = X0 + kf$xtT[,i-1] #sum 2:T E(xt1T)
+ X1 = X1 + kf$xtT[,i] #sum 2:T E(xtT)
}
- Q_unconstrained = (S11 - (S10 + t(S10)) + S00)/(numYrs-1); #numYrs - 1 since summing 2 to T
- # Calculate the interaction matrix, if requested
- if(estInteractions==TRUE) B = S10%*%chol2inv(chol(S00))
+
+ ################
+ # EW: Get new B subject to its constraints
+ ################################################################
+ if(B.constraint!="fixed" & B.constraint!="identity")
+ B_unconstrained = (S10-U%*%t(X0))%*%chol2inv(chol(S00))
+ if(B.constraint == "unconstrained") B <- B_unconstrained
+ if(B.constraint == "diagonal") { #allows shared B's along diagonal
+ B=makediag(takediag(S10-U%*%t(X0))/takediag(S00))
+ B <- makediag(c(ZB%*%(colSums(B%*%ZB) * 1/colSums(ZB))),nrow=m)
+ }
+ ################
+ # Get new Q subject to its constraints
+ ################################################################
+ Q_unconstrained = (S11 - B%*%t(S10) - S10%*%t(B) + B%*%S00%*%t(B)
+ -U%*%t(X1) - X1%*%t(U) + U%*%t(B%*%X0) + B%*%X0%*%t(U))/(numYrs-1) + U%*%t(U);
+ # don't use (S11 - B %*% t(S10))/(numYrs-1) like in S&S because that is just for certain cases of the process eqn.
+
#The inv functions don't work well when the diagonal elements get lower than the machine tol, so put a min on those
if(length(Q_unconstrained)==1)
Q_unconstrained <- max(sqrt(.Machine$double.eps),Q_unconstrained)
@@ -314,42 +407,67 @@
Qoffdiag <- Qoffdiag*(matrix(1,m,m)-makediag(1,m))
Q <- Qdiag + Qoffdiag;
} #if equal variances and covariances
-
+ if(varcov.Q == "equalvarcov.groups") {
+ Qdiag = takediag(Q_unconstrained)
+ Qoffdiag <- array(0,dim=c(m,m))
+ for(qgrp in 1:Q.numGroups) {
+ whichInGroup = which(Q.groups==qgrp)
+ Qdiag[whichInGroup] = mean(takediag(Q_unconstrained)[whichInGroup]) #average
+ numInGroup=length(whichInGroup)
+ if(numInGroup > 1)
+ Qoffdiag[whichInGroup,whichInGroup] = (1/(numInGroup*(numInGroup-1)))*
+ (sum(Q_unconstrained[whichInGroup,whichInGroup])-sum(takediag(Q_unconstrained[whichInGroup,whichInGroup])))
+ }
+ if(m>1) diag(Qoffdiag)=0 #the if stops R from creating an Qoffdiag x Qoffiag matrix is Qoffdiag happens to be an interger
+ Q <- makediag(Qdiag) + Qoffdiag;
+ } #if equal variances and covariances in groups
+
################
- # EH: Get new x00, V00 cannot be estimated so it is fixed
+ # EH: If we treat x00 as fixed and unknown, we estimate it.
+ # Get new x00, V00 cannot be estimated so it is fixed
# S&S Eqn 4.78
+ # If x00 is a known prior, we don't estimate.
################################################################
- x00 <- c(kf$x0T)
+ if(x00prior==FALSE) {
+ x00 = c(kf$x0T)
+ x00 = as.vector(c(Zx00%*%(colSums(makediag(x00,nrow=m)%*%Zx00) * 1/colSums(Zx00))) ) #impose grouping
+ x00 = array(x00, dim=c(m,1))
+ }
################
# Keep a record of the iterations; this is to allow user to debug if they are having problems with convergence
- ################################################################
- iter.record$R[,,iter] <- R
- iter.record$U[,iter] <- U
- iter.record$Q[,,iter] <- Q
- iter.record$A[,iter] <- A
- iter.record$loglike[iter] <- loglike.new
+ # Written this way since we don't know ahead of time how long the record will be
+ # that depends on how quickly it converges
+ ################################################################
+ if(debugit & loop > numInits){
+ iter.record$R = array(c(iter.record$R,R),dim=c(n,n,iter+1))
+ iter.record$U <- array(c(iter.record$U,U),dim=c(m,iter+1))
+ iter.record$x00 <- array(c(iter.record$x00,x00),dim=c(m,iter+1))
+ iter.record$Q <- array(c(iter.record$Q,Q),dim=c(m,m,iter+1))
+ iter.record$A <- array(c(iter.record$A,A),dim=c(n,iter+1))
+ iter.record$loglike <- c(iter.record$loglike,loglike.new)
+ }
} # end inner iter loop
# If doing a MCInit, check whether the likelihood is the best observed
# Only use bootstrap param draws where loglike did not go down during numInitSteps
if(validDraw == TRUE & MonteCarloInit == TRUE & loglike.new > bestLL) {
# update the best initial parameter estimates
- best.Q = iter.record$Q[,,1]
- best.R = iter.record$R[,,1]
- best.U = iter.record$U[,1]
- best.x00 = init.x00
+ best.Q = Q
+ best.R = R
+ best.U = U
+ best.A = A
+ best.B = B
+ best.x00 = x00
bestLL = loglike.new
}
} # end iter loop that is running until the EM algorithm converges
-# Consolidate arrays for output
-iter.record$R <- iter.record$R[,,1:(iter-1)]
-iter.record$U <- iter.record$U[,1:(iter-1)]
-iter.record$Q <- iter.record$Q[,,1:(iter-1)]
-iter.record$A <- iter.record$A[,1:(iter-1)]
-iter.record$loglike <- iter.record$loglike[1:(iter-1)]
+# Fix the initial loglike from -10000
+if(debugit){
+ iter.record$loglike[1] <- iter.record$loglike[2]
+}
if(!silent){
cat(paste("Finished in ",iter," interations. Max.iter was ",max.iter,".\n",sep=""))
}
@@ -359,30 +477,74 @@
se = matrix(0,nrow=m,ncol=numYrs)
for(i in 1:numYrs) se[,i] = t(sqrt(takediag(kf$VtT[,,i])))
}
+# This calculates the correct likelihood when x00 is estimated, that is treated as fixed but unknown
+# Since it is fixed, V00=0 but we can't run the EM algorithm with V00=0 or it would not converge
+if(x00prior==FALSE) {
+ V00=0*V00 #if x00prior==FALSE, then x00 is treated as fixed but unknown and V00=0
+ iter.model=list( U=U, Q=Q, R=R, A=A, B=B, x00=x00, V00=V00, include=include, kf.data=y )
+ kf = Kfilter(iter.model);
+ loglike=kf$loglik
+ }
+else loglike=loglike.new
+
+# AIC calculation
+B.params = ifelse(!(B.constraint %in% c("fixed","identity")), m * m, 0)
+if(x00prior==FALSE) x00.params = length(unique(as.vector(x00)) ) else x00.params=0
+Q.params = unique(as.vector(Q))
+Q.params = length(Q.params[Q.params != 0])
+R.params = unique(as.vector(R))
+R.params = length(R.params[R.params != 0])
+U.params = length(unique(as.vector(U)))
+A.params = unique(as.vector(A))
+A.params = length(A.params[A.params != 0])
+K = x00.params + B.params + Q.params + R.params + U.params + A.params
+AICval = -2*loglike + 2*K
+AICc = ifelse(sum(include)>(K+1),-2*loglike + 2*K*(sum(include)/(sum(include)-K-1)),"NA, data points less than K+1")
+
+# make sure R change the dimensions of any of the matrices- 01/20/09
+if(is.matrix(B)==FALSE) B = as.matrix(B)
+if(is.matrix(Q)==FALSE) Q = as.matrix(Q)
+if(is.matrix(R)==FALSE) R = as.matrix(R)
+if(is.matrix(V00)==FALSE) V00 = as.matrix(V00)
+U=array(U,dim=c(m,1))
+A=array(A,dim=c(n,1))
+x00=array(x00,dim=c(m,1))
-names(U) = U.names
-
-if(is.matrix(Q)==F) Q = as.matrix(Q) # this line added to make sure that a scalar isn't passed to rmvnorm - 01/20/09
-
-rownames(Q) = Q.names
-colnames(Q) = Q.names
-rownames(R) = R.names
-colnames(R) = R.names
-states = kf$xtT
-return(list(states=states,CI_states.lower = (kf$xtT - 1.96*se), CI_states.upper = (kf$xtT + 1.96*se), A=A,B=B,Q=Q,R=R,U=U,Z=Z,x00=x00,V00=V00,V0T=kf$V0T,Kt=kf$Kt,Innov=kf$Innov,Sigma=kf$Sigma,m=m, n=n, numYrs=numYrs,iter=iter,loglike=loglike.new,iter.record=iter.record, whichPop=whichPop, R.groups=R.groups, Q.groups=Q.groups, U.groups=U.groups, varcov.R=varcov.R, varcov.Q=varcov.Q, data=t(y), include=include))
+# fix the names to the user specified names
+state.names = levels(as.factor(whichPop))
+rownames(U) = paste(state.names,U.names,sep="")
+rownames(A) = paste(site.names,whichPop.names, sep="")
+rownames(x00) = paste(state.names,x00.names,sep="")
+rownames(V00) = state.names
+colnames(V00) = state.names
+rownames(Q) = paste(state.names,Q.names,sep="")
+colnames(Q) = paste(state.names,Q.names,sep="")
+rownames(R) = paste(site.names,R.names,whichPop.names,sep="")
+colnames(R) = paste(site.names,R.names,whichPop.names,sep="")
+rownames(B) = state.names
+colnames(B) = state.names
+rownames(kf$xtT) = state.names
+
+return(list(states=kf$xtT,A=A,B=B,Q=Q,R=R,U=U,Z=Z,x00=x00,V00=V00,V0T=kf$V0T,Kt=kf$Kt,Innov=kf$Innov,Sigma=kf$Sigma,m=m, n=n,
+numYrs=numYrs,iter=iter,loglike=loglike, AIC=AICval, AICc=AICc, iter.record=iter.record,
+whichPop=whichPop, R.groups=R.groups, Q.groups=Q.groups, U.groups=U.groups, varcov.R=varcov.R, varcov.Q=varcov.Q,
+estInteractions=estInteractions, x00prior=x00prior, data=orig.data, kf.data=y, include=include))
} #End of the KalmanEM function
-
-Kfilter = function(m,n,numYrs,U,Q,A,R,Phi,x00,V00,include,y) {
- # EH: This is running a Kalman-Raush filter (forward + backward) to get (written univariate to make it look cleaner):
- # xtT: E(x(t) | y(1:T)) , VtT: Var(x(t)*x(t) | y(1:T)), Vtt1T: Var(x(t)*x(t-1) | y(1:T))
- # EJW: This code is a hybrid from Shumway and Stoffer (2006) and Matlab
- # code originally written by Eli Holmes for multi-state state-space
- # estimation. The code originally used solve() for matrix inverses -
- # EW changed this to cholesky decomp though for speed.
+#######################################################################################################
+# Kfilter
+#######################################################################################################
+Kfilter = function(mssm.model) {
+ # EH: This returns the output from the Kalman filter and smoother
# ** All eqn refs are to 2nd ed of Shumway & Stoffer (2006): Time Series Analysis and Its Applications
-
+
+ #attach would be risky here since user might have one of these variables in their workspace
+ U=mssm.model$U; Q=mssm.model$Q; R=mssm.model$R; A=mssm.model$A; Phi=mssm.model$B; x00=mssm.model$x00; V00=mssm.model$V00
+ include=mssm.model$include; #this is the design matrix, called A in S&S, with 0s at missing values
+ y=mssm.model$kf.data #kf.data has missing values replaced by 0 and is n x T (not T x n)
+ n=dim(y)[1]; numYrs=dim(y)[2]; m=dim(as.matrix(Q))[1]
+
#initialize - these are for the forward, Kalman, filter
# for notation purposes, 't' represents current point in time, 'T' represents the length of the series
Vtt <- array(0,dim=c(m,m,numYrs)) # Analagous to S&S Ptt, E[Vtt|y(1:t)]
@@ -400,7 +562,7 @@
Kt <- array(0, dim=c(m,n,numYrs)) # 3D matrix of Kalman gain, EW added 11/14/08
#forward pass gets you E[x(t) given y(1:t)]
- xtt1[,1] <- Phi%*%c(x00) + U # eqn 6.19
+ xtt1[,1] <- Phi%*%x00 + U # eqn 6.19
Vtt1[,,1] <- Phi%*%V00%*%t(Phi) + Q # eqn 6.20
include.Y <- include[,,1] # used for missing data
dim(include.Y) <- c(n,m) #stops R from changes the dimensions when m=1
@@ -414,9 +576,14 @@
xtt[,1] <- xtt1[,1] + Kt[,,1]%*%innov[,1]; # eqn 6.21
Vtt[,,1] <- Vtt1[,,1]-Kt[,,1]%*%include.Y%*%Vtt1[,,1]; # eqn 6.22, detail after 6.28
Vtt[,,1] <- (Vtt[,,1]+t(Vtt[,,1]))/2; #Vtt must be symmetric
- # the missing values will contribute 0.0 for this calc
+ # The following are needed for the likelihood calculation
+ # the missing values will contribute 0.0 for the LL calc
+ # R_mod is needed for the corrected likelihood calculation when there are missing values
+ # See section 12.3 in Time Series: Theory and Methods (1991) Peter J. Brockwell, Richard A. Davis
+ include.Y3 <- diag(n)-makediag(include.Y2)
+ R_mod = include.Y3 + t(makediag(include.Y2)) %*% R %*% makediag(include.Y2)
vt[,1] <- y[,1]-(include.Y%*%xtt1[,1]+include.Y2*A) #3.3.4; need to hold on to this for loglik calc
- Ft[,,1] <- include.Y%*%Vtt1[,,1]%*%t(include.Y)+R #need to hold on to this for loglik calc
+ Ft[,,1] <- include.Y%*%Vtt1[,,1]%*%t(include.Y)+R_mod #need to hold on to this for loglik calc
Ft[,,1] <- (Ft[,,1]+t(Ft[,,1]))/2; #to ensure its symetric
for (t in 2:numYrs) {
@@ -431,9 +598,11 @@
xtt[,t] <- xtt1[,t] + Kt[,,t]%*%innov[,t] # eqn 6.21
Vtt[,,t] <- Vtt1[,,t]-Kt[,,t]%*%include.Y%*%Vtt1[,,t] # eqn 6.22, detail after 6.28
Vtt[,,t] <- (Vtt[,,t]+t(Vtt[,,t]))/2 #to ensure its symetric
- # missing values will contribute nothing
+ # Variables needed for the likelihood calculation; see comments above
+ include.Y3 <- diag(n)-makediag(include.Y2)
+ R_mod = include.Y3 + t(makediag(include.Y2)) %*% R %*% makediag(include.Y2)
vt[,t] <- y[,t]-(include.Y%*%xtt1[,t]+include.Y2*A) #need to hold on to this for loglik calc
- Ft[,,t] <- include.Y%*%Vtt1[,,t]%*%t(include.Y)+R #need to hold on to this for loglik calc
+ Ft[,,t] <- include.Y%*%Vtt1[,,t]%*%t(include.Y)+R_mod #need to hold on to this for loglik calc
Ft[,,t] <- (Ft[,,t]+t(Ft[,,t]))/2 #to ensure its symetric
}
KT <- Kt[,,t];
@@ -462,16 +631,16 @@
#run another backward recursion to get E[x(t)x(t-1)|y(T)]
include.Y <- include[,,numYrs]
dim(include.Y) <- c(n,m) #stops R from changes the dimensions when m=1
- Vtt1T[,,numYrs] <- (makediag(1,m) - KT%*%include.Y)%*%Vtt[,,numYrs-1] #this is Var(x(T)x(T-1)|y(T))
+ Vtt1T[,,numYrs] <- (makediag(1,m) - KT%*%include.Y)%*%Phi%*%Vtt[,,numYrs-1] #eqn. 6.55 this is Var(x(T)x(T-1)|y(T))
s <- seq(numYrs,3)
for (i in 1:(numYrs-2)) {
yr <- s[i]
- Vtt1T[,,yr-1] <- Vtt[,,yr-1]%*%t(J[,,yr-2]) + J[,,yr-1]%*%(Vtt1T[,,yr]-Vtt[,,yr-1])%*%t(J[,,yr-2])
+ Vtt1T[,,yr-1] <- Vtt[,,yr-1]%*%t(J[,,yr-2]) + J[,,yr-1]%*%(Vtt1T[,,yr]-Phi%*%Vtt[,,yr-1])%*%t(J[,,yr-2]) #eqn 6.56
}
- Vtt1T[,,1] <- Vtt[,,1]%*%t(J0) + J[,,1]%*%(Vtt1T[,,2]-Vtt[,,1])%*%t(J0)
+ Vtt1T[,,1] <- Vtt[,,1]%*%t(J0) + J[,,1]%*%(Vtt1T[,,2]-Phi%*%Vtt[,,1])%*%t(J0)
#Calculate log likelihood, see eqn 6.62
- loglik <- -n*(numYrs/2)*log(2*pi)
+ loglik <- -sum(include)/2*log(2*pi)
for (i in 1:numYrs) {
if(length(Ft[,,i])==1) detFt <- Ft[,,i] else detFt <- det(Ft[,,i])
Ftinv <- chol2inv(chol(Ft[,,i]))
@@ -481,76 +650,187 @@
return(list(xtT = xtT, VtT = VtT, Vtt1T = Vtt1T, x0T = x0T, V0T = V0T, loglik = loglik, Vtt = Vtt, Vtt1 = Vtt1, J=J, Kt=Kt, xtt1 = xtt1, Innov=innov, Sigma=Ft))
}
-
-FailedErrorCheck = function(y, whichPop, U.groups, R.groups, Q.groups, varcov.Q, varcov.R,Uinit,Qinit,Ainit,Rinit,x00) {
+###############################################################################################################################################
+# FailedErrorCheck
+###############################################################################################################################################
+FailedErrorCheck = function(y, whichPop, U.groups, x00.groups, A.groups, R.groups, Q.groups, B.groups, varcov.Q, varcov.R, B.constraint, Uinit, Qinit, Ainit, Rinit, Binit, x00, V00init,estInteractions) {
############# Error check to make sure user isn't trying to set up an illegal or inconsistent model structure
#################### Check whichPop
n = dim(y)[1] #each row is obs ts
if(length(whichPop) != n) {
- cat("Error: whichPop is not in the correct form; it should be 1xn or nx1; \n"); return(TRUE) }
+ cat("Error: whichPop is not in the correct form; it should be 1xn or nx1; \n")
+ cat("n is the number of time series (i.e. columns) in your dataset.\n")
+ return(TRUE) }
m = max(whichPop) # this is the number of state processes
if( FALSE %in% ( (1:m) %in% whichPop ) ) {
- cat("Error: Something is wrong with whichPop. You need an observation time series for each state process. \n"); return(TRUE) }
+ cat("Error: Something is wrong with whichPop.\n")
+ cat(paste("max(whichPop) is ", m, " and each of 1:m should appear in whichPop.\n",sep=""))
+ cat("But some of the 1:m are missing from your whichPop.\n")
+ return(TRUE) }
#################### Check varcov.Q
-if( (varcov.Q %in% list("unconstrained", "diagonal", "equalvarcov"))==FALSE ) {
- cat("Error: options for varcov.Q are scalar, unconstrained, diagonal or equalvarcov (passed as text in quotes) \n" ); return(TRUE) }
+if( (varcov.Q %in% list("unconstrained", "diagonal", "equalvarcov", "equalvarcov.groups"))==FALSE ) {
+ cat("Error: options for varcov.Q are unconstrained, diagonal or equalvarcov (passed as text in quotes) \n" ); return(TRUE) }
+if( varcov.Q == "equalvarcov" & !( !(FALSE %in% (Q.groups == rep(1,m))) | (length(Q.groups==1) & is.na(Q.groups[1])) ) ) {
+ cat("Error: Mismatch between varcov.Q and Q.groups. varcov.Q=equalvarcov but Q.groups != rep(1,m) or NA. \n"); return(TRUE) }
#################### Check varcov.R
if( (varcov.R %in% list("unconstrained", "diagonal", "equalvarcov"))==FALSE ) {
cat("Error: options for varcov.R are unconstrained, diagonal or equalvarcov (passed as text in quotes) \n" ); return(TRUE) }
if( (-99 %in% y) & varcov.R != "diagonal" ) {
cat("Sorry! The current code cannot estimate observation error covariances when there are missing data; set varcov.R to diagonal \n"); return(TRUE) }
+if( varcov.R == "equalvarcov" & !( !(FALSE %in% (R.groups == rep(1,n))) | (length(R.groups==1) & is.na(R.groups[1])) ) ) {
+ cat("Error: Mismatch between varcov.R and R.groups. varcov.R=equalvarcov but R.groups != rep(1,n) or NA. \n"); return(TRUE) }
+
+################### B.constraint checked below after Binit is checked
#################### Check Q.groups
if(length(Q.groups) != m) {
- cat("Error: There is a mismatch between the number of state processes and length of Q.groups \n"); return(TRUE) }
+ cat("Error: There is a mismatch between the number of state processes and length of Q.groups \n")
+ cat("max(whichPop) must equal length(Q.groups) so that each state is assigned a Q group. \n")
+ return(TRUE)
+ }
not.all.m.in.groups = FALSE %in% ( (1:m) %in% Q.groups )
-if( not.all.m.in.groups & varcov.Q != "diagonal" ) {
- cat("Sorry! The code cannot apply groupings of Qs except to diagonal Q matrices \n"); return(TRUE) }
+if( not.all.m.in.groups & varcov.Q == "unconstrained" ) {
+ cat("Sorry! The code cannot apply groupings of Qs except to diagonal or equalvarcov Q matrices \n"); return(TRUE) }
# Check that each state process is assigned to a Q group
if(FALSE %in% ((1:max(Q.groups)) %in% Q.groups) ) {
cat(paste("Error: something is wrong with Q.groups You have ",max(Q.groups)," Q groups but not all appear in Q.groups \n",sep="")); return(TRUE) }
-#################### Check U.groups
-if(length(U.groups) != m) {
- cat(paste("Error: U.groups should be m x 1 or 1 x m","Model is m = ",m," as currently set up.\n",sep="")); return(TRUE) }
+#################### Check U.groups length
+U.groups=as.array(U.groups) #set dim attr
+if(!(identical(floor(dim(U.groups)),floor(c(m,1))) | identical(floor(dim(U.groups)),floor(c(1,m))) | identical(floor(dim(U.groups)),floor(m)) )) {
+ cat(paste("Error: U.groups should be length m. ","Model is m = ",m," as currently set up.\n",sep="")); return(TRUE) }
if(FALSE %in% ((1:max(U.groups)) %in% U.groups) ) {
cat(paste("Error: something is wrong with U.groups. You have ",max(U.groups)," U groups but not all appear in U.groups. \n",sep="")); return(TRUE) }
-#################### Check R.groups
-if(length(R.groups) != n) {
- cat("Error: There is a mismatch between the number of measurement time series and length of R.groups. \n"); return(TRUE) }
-not.all.n.in.groups = FALSE %in% ( (1:n) %in% R.groups )
-if( not.all.n.in.groups & varcov.R != "diagonal" ) {
+#################### Check R.groups length
+R.groups=as.array(R.groups) #set dim attr
+if(!(identical(floor(dim(R.groups)),floor(c(n,1))) | identical(floor(dim(R.groups)),floor(c(1,n))) | identical(floor(dim(R.groups)),floor(n)) )) {
+ cat("Error: There is a mismatch between the number of measurement time series and length of R.groups. \n")
+ cat("The number of columns of data must equal length(R.groups) so that each time series (column) is assigned an R group. \n")
+ return(TRUE) }
+not.all.n.in.groups = !(setequal(1:n, R.groups)) #check that R.groups is some combo of 1:n (previous check made sure that length(R.groups)==n
+if( not.all.n.in.groups & varcov.R == "unconstrained" ) {
cat("Sorry! The code cannot apply R.groups except to diagonal R matrices; set varcov.R to diagonal. \n"); return(TRUE) }
-if(FALSE %in% ((1:max(R.groups) ) %in% R.groups) ) {
- cat(paste("Error: something is wrong with R.groups. You have ",max(R.groups)," R groups but not all appear in R.groups.\n",sep="")); return(TRUE) }
-
+if(!setequal(1:max(R.groups), R.groups)) {
+ cat(paste("Error: something is wrong with R.groups. You have 1:",max(R.groups)," R groups but not all appear in R.groups.\n",sep="")); return(TRUE) }
+
+#################### Check B.groups
+if(length(B.groups) != m) {
+ cat("Error: There is a mismatch between the number of state processes and length of B.groups \n")
+ cat("max(whichPop) must equal length(B.groups) so that each state is assigned a B group. \n")
+ return(TRUE)
+ }
+not.all.m.in.groups = FALSE %in% ( (1:m) %in% B.groups )
+if( not.all.m.in.groups & B.constraint == "unconstrained" ) {
+ cat("Sorry! The code cannot apply groupings to B estimation except for diagonal B matrices \n"); return(TRUE) }
+#if B.constaint == "fixed" or "identity" then B.groups can be passed in. It is used just for names on the B matrix.
+# Check that each state process is assigned to a B group
+if(FALSE %in% ((1:max(B.groups)) %in% B.groups) ) {
+ cat(paste("Error: something is wrong with B.groups You have ",max(B.groups)," B groups but not all appear in B.groups \n",sep="")); return(TRUE) }
+
+#################### Check A.groups
+A.groups=as.array(A.groups) #set dim attr
+if(!(identical(floor(dim(A.groups)),floor(c(n,1))) | identical(floor(dim(A.groups)),floor(c(1,n))) | identical(floor(dim(A.groups)),floor(n)) )) {
+ cat("Error: There is a mismatch between the number of measurement time series and length of A.groups. \n")
+ cat("The number of columns of data must equal length(A.groups) so that each time series (column) is assigned an A group. \n")
+ return(TRUE)
+ }
+
+#################### Check x00.groups
+x00.groups=as.array(x00.groups) #set dim attr
+if(!(identical(floor(dim(x00.groups)),floor(c(m,1))) | identical(floor(dim(x00.groups)),floor(c(1,m))) | identical(floor(dim(x00.groups)),floor(m)) )) {
+ cat(paste("Error: x00.groups should be length m","Model is m = ",m," as currently set up.\n",sep="")); return(TRUE) }
+
############# Check that the initial values are correctly sized
###########################################################
-if(length(Uinit) != m) {
- cat("Error: Uinit should be m x 1 or 1 x m; Either your Uinit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
-if(length(Qinit) != m) {
- cat("Error: Qinit should be m x 1 or 1 x m; Either your Qinit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
-if(length(Ainit) != n) {
- cat("Error:L Ainit should be n x 1 or 1 x n; Your Ainit is the wrong size.\n"); return(TRUE) }
-if(length(Rinit) != n) {
- cat("Error: Rinit should be n x 1 or 1 x n; Your Rinit is the wrong size.\n"); return(TRUE) }
-if(length(x00) != m) {
- cat("Error: x00 should be m x 1 or 1 x m; Either your x00 is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
+Uinit=as.array(Uinit) #set dim attr
+if(!(identical(floor(dim(Uinit)),floor(c(m,1))) | identical(floor(dim(Uinit)),floor(c(1,m))) | identical(floor(dim(Uinit)),floor(m)) | identical(floor(dim(Uinit)),floor(1)))) {
+ cat("Error: Uinit should be scalar, m x 1 or 1 x m; Either your Uinit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
+Qinit=as.array(Qinit) #make sure dim attr is set
+if(!(identical(floor(dim(Qinit)),floor(c(m,1))) | identical(floor(dim(Qinit)),floor(c(1,m)))
+ | identical(floor(dim(Qinit)),floor(m)) | identical(floor(dim(Qinit)),floor(1)) | identical(floor(dim(Qinit)),floor(c(m,m)))) ) {
+ cat("Error: Qinit should be scalar, 1 x m, m x 1 or m x m; Either your Qinit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
+Ainit=as.array(Ainit) #set dim attr
+if(!(identical(floor(dim(Ainit)),floor(c(n,1))) | identical(floor(dim(Ainit)),floor(c(1,n))) | identical(floor(dim(Ainit)),floor(n)) | identical(floor(dim(Ainit)),floor(1)))) {
+ cat("Error:L Ainit should be scalar, n x 1 or 1 x n; Your Ainit is the wrong size.\n"); return(TRUE) }
+Rinit=as.array(Rinit) #make sure dim attr is set
+if(!(identical(floor(dim(Rinit)),floor(c(n,1))) | identical(floor(dim(Rinit)),floor(c(1,n)))
+ | identical(floor(dim(Rinit)),floor(n)) | identical(floor(dim(Rinit)),floor(1)) | identical(floor(dim(Rinit)),floor(c(n,n)))) ) {
+ cat("Error: Rinit should be scalar, 1 x n, n x 1 or n x n; Either your Rinit is wrong or your flags are setting n to something you are not expecting.\n"); return(TRUE) }
+Binit=as.array(Binit) #make sure dim attr is set
+if(!((length(Binit)==1 & is.na(Binit[1])) | identical(floor(dim(Binit)),floor(c(m,m)))) ) {
+ cat("Error: Binit should be m x m or NA (if not being fixed); Either your Binit is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
+if(!(length(Binit)==1 & is.na(Binit[1])) & NA %in% Binit) {
+ cat("Error: Binit cannot contain NAs. \n"); return(TRUE) }
+x00=as.array(x00) #make sure dim attr is set
+if(!(identical(floor(dim(x00)),floor(c(m,1))) | identical(floor(dim(x00)),floor(c(1,m))) | identical(floor(dim(x00)),floor(m)) | identical(floor(dim(x00)),floor(1)))) {
+ cat("Error: x00 should be a scalar or a m x 1 or 1 x m vector; Either your x00 is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
+V00init=as.array(V00init) #floor is necessary to get R to really make dim integer (grrr)
+if(!(identical(floor(dim(V00init)),floor(1)) | identical(floor(dim(V00init)),floor(m)) | identical(floor(dim(V00init)),floor(c(1,1))) | identical(floor(dim(V00init)),floor(c(m,1)))
+ | identical(floor(dim(V00init)),floor(c(1,m))) | identical(floor(dim(V00init)),floor(c(m,m)))) ) {
+ cat("Error: V00init should be length 1, m or m x m; Either your V00init is wrong or your flags are setting m to something you are not expecting.\n"); return(TRUE) }
+
+#################### Check B.constraint (depends on Binit passing its checks, i.e. is either NA or m x m)
+if(estInteractions==TRUE) B.constraint="identity" #backwards compatibility
+if( (B.constraint %in% list("fixed", "identity", "diagonal", "unconstrained"))==FALSE ) {
+ cat("Error: options for B.constraint are fixed, identity, unconstrained, or diagonal (passed as text in quotes) \n" ); return(TRUE) }
+if( B.constraint == "fixed" & (length(Binit)==1 & is.na(Binit[1])) ) {
+ cat("Error: If B.constraint==fixed then Binit must be set to what B should be held fixed at. \n"); return(TRUE) }
+B.is.not.na=FALSE
+if(length(Binit)==(m*m) & length(Binit)!=1) B.is.not.identity = (FALSE %in% (Binit==makediag(1,m)))
+
+if(length(Binit)==1 & !is.na(Binit[1])) B.is.not.na = TRUE
+if(length(Binit)==1 & is.na(Binit[1])) {B.is.not.na = FALSE; B.is.not.identity=TRUE}
+if( B.constraint == "identity" & B.is.not.identity & B.is.not.na ) {
+ cat("Error: If B.constraint==identity then Binit must be set to NA or a m x m identity matrix. \n"); return(TRUE) }
return(FALSE)
}
+###############################################################################################################################################
+# FailedArgumentCheck
+###############################################################################################################################################
+FailedArgumentCheck = function(y, whichPop, Uinit, Qinit, Ainit, Rinit, x00, V00init, max.iter, varcov.Q, varcov.R, U.groups, Q.groups, R.groups, U.bounds, logQ.bounds, logR.bounds, x00prior, estInteractions, MonteCarloInit, numInits, numInitSteps, tol, silent, debugit)
+#This checks that the user didn't specify illegal values for the arguments
+{
+if(!is.numeric(Ainit) | any(is.na(Ainit)) ) {
+ cat("Error: Ainit must be numeric and no NAs allowed.\n"); return(TRUE) }
+if(!is.numeric(Uinit) | any(is.na(Uinit)) ) {
+ cat("Error: Uinit must be numeric and no NAs allowed.\n"); return(TRUE) }
+if(!is.numeric(Rinit) | !(all(diag(as.matrix(Rinit))>0)) | any(is.na(Rinit)) ) {
+ cat("Error: Rinit must be numeric, >0 on the diagonal and no NAs allowed.\n"); return(TRUE) }
+if(!is.numeric(Qinit) | !(all(diag(as.matrix(Qinit))>0)) | any(is.na(Qinit)) ) {
+ cat("Error: Qinit must be numeric, >0 on the diagonal and no NAs allowed.\n"); return(TRUE) }
+if(!is.numeric(V00init) | !(all(diag(as.matrix(V00init))>0)) | any(is.na(V00init)) ) {
+ cat("Error: V00init must be numeric and positive on the diagonal.\n"); return(TRUE) }
+if( !(x00prior %in% c(TRUE, FALSE) ) ) {
+ cat("Error: x00prior must be TRUE or FALSE. It tells whether to treat x00 as known distribution (prior) or fixed but unknown.\n"); return(TRUE) }
+
+if( !(MonteCarloInit %in% c(TRUE, FALSE) ) ) {
+ cat("Error: MonteCarloInit must be TRUE or FALSE. It tells whether to do a random initial conditions start.\n"); return(TRUE) }
+if( MonteCarloInit==TRUE & numInitSteps<1 ){
+ cat("Error: When doing an MCInit (MonteCarloInit=TRUE), numInitSteps must be >= 1.\n"); return(TRUE) }
+if( MonteCarloInit==TRUE & numInits<1 ){
+ cat("Error: When doing an MCInit (MonteCarloInit=TRUE), numInits must be >= 1.\n"); return(TRUE) }
+
+return(FALSE)
+}
+
+###############################################################################################################################################
+# diag helper functions
+###############################################################################################################################################
takediag = function(x)
+############# Function to take the diagonal; deals with R trying to think too much with diag()
{
if(length(x)==1) return(x)
else return(diag(x))
}
makediag = function(x,nrow=NA)
+############# Function to make a diagonal matrix; deals with R trying to think too much with diag()
{
if(length(x)==1)
{
@@ -565,12 +845,19 @@
#######################################################################################################
-# These functions are all for bootstrapping
+# bootstrapMSSM
#######################################################################################################
-bootstrapMSSM = function(nBoot, model, maxIter = 5000, tol=0.01, onlyAIC = TRUE, silent=FALSE) {
- # This function updated by EW and EH
+bootstrapMSSM = function(nBoot, model, maxIter = 5000, tol=0.01, onlyAIC = NA, output="MLests", parametric=TRUE, silent=FALSE) {
# all equation numbers refer to Chapter 6, Shumway & Stoffer
- # onlyAIC is a debug feature to allows all the boot params and boot data iterates to be returned
+ # nBoot is the number of bootstrap replicates to do
+ # model is a specified model (needs the structure and parameter elements of the list)
+ # onlyAIC=FALSE : return all the parameter estimates from the bootstrap iterates; this is needed for bootstrap CIs
+ # output tells what output to produce; this can be a vector
+ # bootdata : returns the simulated (or bootstrapped) data.
+ # MLests : return the ML parameter estimates from the bootstrapped data
+ # AICb, Param.AICb, Nonparam.AICb: AICb does Nonparam.AICb by default but switches to Param.AICb if there are missing values
+ # AICi returns AICi
+ # parametric = TRUE forces the new data generated for output="bootdata" and/or "MLests" to use parametric bootstrapping
# silent is a flag to indicate whether the progress bar should be printed
###### Check that package mvtnorm is installed and load it if it isn't already loaded
@@ -581,94 +868,207 @@
cat(paste(str1,str2,str3,sep=""))
stop("the mvtnorm package is not installed")
}
-
- errors = t(stdInnov(model$Sigma, model$Innov)) # standardize innovations
+ ###### Error-checking on the arguments
+ if(onlyAIC %in% c(TRUE,FALSE)) stop("onlyAIC doesn't do anything anymore; use output=c() to describe what output you want")
+ if(FALSE %in% (output %in% c("bootdata","MLests","AICb","Hessian.AICb","Param.AICb","Nonparam.AICb","AICi"))) stop("incorrect bootstrapMSSM arg: output must be either bootdata, MLests, AICb, Param.AICb, Nonparam.AICb or AICi (in quotes)")
+ if(!(parametric %in% c(TRUE,FALSE))) stop("incorrect bootstrapMSSM arg: parametric must be either TRUE or FALSE")
+ if(!(silent %in% c(TRUE,FALSE))) stop("incorrect bootstrapMSSM arg: silent must be either TRUE or FALSE")
+ if(!is.numeric(tol)) stop("incorrect bootstrapMSSM arg: tol must be a number")
+ if(!is.numeric(maxIter)) stop("incorrect bootstrapMSSM arg: maxIter must be a number")
+ if( ("bootdata" %in% output) | ("MLests" %in% output) ) onlyAIC = FALSE # return parameter estimates
+ ### 2-7-09 Temporary backwards compatibility because I added new term to model object
+ if(is.null(model$estInteractions)) model$estInteractions=FALSE
+
numStates = model$m # number of subpopulations
numYears = model$numYrs # length of time series
numSites = model$n # number of time series
- # Calculate the sqrt of sigma matrices, so they don't have to be computed 5000+ times
- sigma.Sqrt = array(0,dim=c(numSites, numSites, numYears))
- BKS = array(0,dim=c(numStates, numSites, numYears))
- for(i in 1:numYears) {
- eig = eigen(model$Sigma[,,i]) # calculate inv sqrt(sigma[1])
- # EW added as.matrix 01/26/09
- sigma.Sqrt[,,i] = eig$vectors %*% makediag((sqrt(eig$values))) %*% t(eig$vectors)
- BKS[,,i] = model$B %*% model$Kt[,,i] %*% sigma.Sqrt[,,i] # this is m x n
+ sigma.Sqrt=NA; BKS=NA; eig=NA; #dummy values in case parametric = TRUE
+ errors = t(stdInnov(model$Sigma, model$Innov)) # standardized innovations; time going down columns
+ # Error-checking and warnings regarding regular versus parametric bootstrapping
+ if( ("AICb" %in% output | "Nonparam.AICb" %in% output ) & silent==FALSE & length(which(errors==0))!=0 ) {
+ cat("Alert: Dataset has missing values. AICb calculation will use parametric bootstrapping instead of regular bootstrapping.\n")
+ output = output[output != "Nonparam.AICb"] # Pull out Nonparam.AICb if user passed that in
+ output = output[output != "AICb"] # Pull out AICb if user passed that in and replace with Param.AICb
+ output=c(output,"Param.AICb") }
+ if( ("AICb" %in% output ) & parametric==TRUE & silent==FALSE & length(which(errors==0))==0) {
+ cat("Alert: AICb calculation uses nonparametric bootstrapping by default. Pass in Param.AICb to use parametric bootstrapping.\n")
+ output = output[output != "AICb"] # Pull out AICb and replace with Nonparam.AICb
+ output=c(output,"Nonparam.AICb") }
+ if(length(which(errors==0))!=0) parametric = TRUE # only can do nonparametric bootstrapping if there are no missing values
+
+ if(parametric == FALSE | ("Nonparam.AICb" %in% output) ){
+ # Calculate the sqrt of sigma matrices, so they don't have to be computed 5000+ times
+ sigma.Sqrt = array(0,dim=c(numSites, numSites, numYears))
+ BKS = array(0,dim=c(numStates, numSites, numYears))
+ for(i in 1:numYears) {
+ eig = eigen(model$Sigma[,,i]) # calculate inv sqrt(sigma[1])
+ sigma.Sqrt[,,i] = eig$vectors %*% makediag(sqrt(eig$values)) %*% t(eig$vectors)
+ BKS[,,i] = model$B %*% model$Kt[,,i] %*% sigma.Sqrt[,,i] # this is m x n
+ }
}
- U=NA; Q=NA; R=NA; A=NA; B=NA; states=NA; boot.data=NA;
+ if("Hessian.AICb" %in% output) {
+ if(silent==FALSE) cat("Computing the Hessian. This might take awhile.\n")
+ modelhess = emHessian(model)
+ paramsigma = solve(modelhess$Hessian)
+ parammean = vectorizemssm(model)
+ }
+
+ boot.params=NA; boot.states=NA; boot.data=NA; logL=NA #dummy values
# these are the arrays for output
- if(onlyAIC == FALSE) {
- boot.data = array(0,dim=c(dim(as.matrix(model$data)),nBoot))
- U = array(0,dim=c(dim(as.matrix(model$U)),nBoot))
- Q = array(0,dim=c(dim(as.matrix(model$Q)),nBoot))
- R = array(0,dim=c(dim(as.matrix(model$R)),nBoot))
- A = array(0,dim=c(dim(as.matrix(model$A)),nBoot))
- B = array(0,dim=c(dim(as.matrix(model$B)),nBoot))
- states = array(0,dim=c(dim(as.matrix(model$states)),nBoot))
+ if("bootdata" %in% output) boot.data = array(NA,dim=c(dim(as.matrix(model$data)),nBoot))
+ if("MLests" %in% output) {
+ boot.params=array(NA,dim=c(length(vectorizemssm(model)),nBoot))
+ boot.states = array(NA,dim=c(dim(as.matrix(model$states)),nBoot))
}
- logL.thetaStar = 0
- logL = 0;
- drawProgressBar = FALSE #If not time library, no prog bar
+ logL.thetaStar=0; logL.Hessian.AICb=0; logL.Param.AICb=0; logL.Nonparam.AICb=0; logL.AICi=0; logL2.AICi=0;
+ Hessian.AICb=NA; Param.AICb=NA; Nonparam.AICb=NA; AICi=NA; eqn8=NA; AICi.approx=NA; #so R knows about these
+ drawProgressBar = FALSE #If the time library is not installed, no prog bar
if(require(time)==TRUE) { #then we can draw a progress bar
prev = progressBar()
drawProgressBar = TRUE
}
+
for(i in 1:nBoot) {
- newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS) # make new data
- # Take the ML output and make sure it's in the right form to be passed back in
- Rstart = diag(model$R)
- Qstart = diag(model$Q)
- # Step 2: estimate the MLEs, theta*
- boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=Qstart,Rinit=Rstart,tol=tol,silent=T,max.iter=maxIter)
- if(onlyAIC == FALSE) {
- # Step 3: store the parameter estimates from the bootstraped data fitting
- boot.data[,,i] = as.matrix(newData)
- U[,,i] = as.matrix(boot.model$U)
- Q[,,i] = as.matrix(boot.model$Q)
- R[,,i] = as.matrix(boot.model$R)
- A[,,i] = as.matrix(boot.model$A)
- B[,,i] = as.matrix(boot.model$B)
- states[,,i] = as.matrix(boot.model$states)
- }
- logL[i] = Kfilter(numStates, numSites, numYears, boot.model$U, boot.model$Q, boot.model$A, boot.model$R, boot.model$B, boot.model$x00, boot.model$V00, boot.model$include, t(model$data))$loglik
+ if( ("MLests" %in% output) | ("Param.AICb" %in% output & parametric==TRUE) | ("Nonparam.AICb" %in% output & parametric==FALSE) | "bootdata" %in% output)
+ newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS, parametric.bootstrapping=parametric ) # make new data
+
+ if("bootdata" %in% output) boot.data[,,i] = as.matrix(newData)
+
+ if( ("MLests" %in% output) | ("Param.AICb" %in% output & parametric==TRUE) | ("Nonparam.AICb" %in% output & parametric==FALSE) ) {
+ # estimate the MLEs (theta*) using the bootstrapped data
+ boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=model$Q,Rinit=model$R, x00=model$x00, estInteractions=model$estInteractions, tol=tol,silent=TRUE,max.iter=maxIter)
+
+ if("MLests" %in% output ) {
+ # store the parameter estimates from the bootstraped data fitting
+ boot.params[,i]=vectorizemssm(boot.model)
+ boot.states[,,i] = as.matrix(boot.model$states) }
+
+ #************AICb calculations
+ # obtain the logL of theta* under the original data
+ if( "Param.AICb" %in% output & parametric==TRUE ) {
+ tmp.model=list( kf.data=model$kf.data, include=model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B,
+ x00=boot.model$x00, V00=boot.model$V00 )
+ logL.Param.AICb[i] = Kfilter(tmp.model)$loglik }
+ if( "Nonparam.AICb" %in% output & parametric==FALSE) {
+ tmp.model=list( kf.data=model$kf.data, include=model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B,
+ x00=boot.model$x00, V00=boot.model$V00 )
+ logL.Nonparam.AICb[i] = Kfilter(tmp.model)$loglik }
+ } #end if MLests or AICb in output
+
+ #in this case, newData (generated above was not parametrically bootstrapped, so have to create new data
+ if( ("Param.AICb" %in% output & parametric==FALSE) ) {
+ newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS, parametric.bootstrapping=TRUE ) # make new data
+ boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=model$Q,Rinit=model$R, x00=model$x00, estInteractions=model$estInteractions, tol=tol,silent=TRUE,max.iter=maxIter)
+ tmp.model=list( kf.data=model$kf.data, include=model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B,
+ x00=boot.model$x00, V00=boot.model$V00 )
+ logL.Param.AICb[i] = Kfilter(tmp.model)$loglik
+ }
+ #in this case, newData (generated above was parametrically bootstrapped, so have to create new nonparametrically bootstrapped data
+ if( ("Nonparam.AICb" %in% output & parametric==TRUE) ) {
+ newData = makeNewData(errors, model, model$Sigma, sigma.Sqrt, BKS, parametric.bootstrapping=FALSE ) # make new data
+ boot.model = KalmanEM(newData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=model$U,Ainit=model$A,Qinit=model$Q,Rinit=model$R, x00=model$x00, estInteractions=model$estInteractions, tol=tol,silent=TRUE,max.iter=maxIter)
+ tmp.model=list( kf.data=model$kf.data, include=model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B,
+ x00=boot.model$x00, V00=boot.model$V00 )
+ logL.Nonparam.AICb[i] = Kfilter(tmp.model)$loglik
+ }
+ if( "Hessian.AICb" %in% output ) {
+ hess.params=rmvnorm(1, mean=parammean, sigma=paramsigma, method="chol")
+ tmp.model=model
+ tmp.model=vectorizemssm(tmp.model,paramvector=hess.params)
+ kftry = try(Kfilter(tmp.model),silent=TRUE)
+ logL.Hessian.AICb[i] = ifelse(!inherits(kftry, "try-error"),kftry$loglik,NA)
+ }
+
+ #************AICi calculations
+ if("AICi" %in% output){
+ simData = rmvnorm(model$numYrs, mean = rep(0, nrow(as.matrix(model$R))), sigma = diag(nrow(as.matrix(model$R))), method="chol") #make 0,1
+ Astart = rep(0,numSites)
+ Rstart = rep(0.5,numSites)
+ Qstart = rep(0.5,numStates)
+ Ustart = rep(0, numStates)
+ x00start = rep(0, numStates)
+ # estimate the MLEs (theta*) using the 0,1 sim data
+ boot.model = KalmanEM(simData, varcov.Q=model$varcov.Q, varcov.R=model$varcov.R, whichPop=model$whichPop,U.groups=model$U.groups, Q.groups=model$Q.groups,R.groups=model$R.groups,Uinit=Ustart,Ainit=Astart,Qinit=Qstart,Rinit=Rstart, x00=x00start, estInteractions=model$estInteractions, tol=tol,silent=T,max.iter=maxIter)
+ # store the logL of theta* under the sim data
+ logL.AICi[i] = boot.model$loglike
+ # generate more sim data
+ simData2 = rmvnorm(model$numYrs, mean = rep(0, nrow(as.matrix(model$R))), sigma = diag(nrow(as.matrix(model$R))), method="chol") #make 0,1 data
+ # obtain the logL of theta* under the sim.star data
+ tmp.model=list( kf.data=t(simData2), include=boot.model$include, U=boot.model$U, Q=boot.model$Q, R=boot.model$R, A=boot.model$A, B=boot.model$B,
+ x00=boot.model$x00, V00=boot.model$V00 )
+ Kf.star = Kfilter(tmp.model)
+ logL2.AICi[i] = Kf.star$loglik
+
+ #B&C eqn 8
+ tmpt = 0 # just a temporary holder
+ for(t in 1:numYears) {
+ include.Y <- boot.model$include[,,t]
+ dim(include.Y) <- c(numSites,numStates) #stops R from changes the dimensions when m=1
+ include.Y2 <- (include.Y%*%array(1,dim=c(numStates,1))) # make an include.Y for A that is nx1
+ tmp = include.Y%*%Kf.star$xtt1[,t]+include.Y2*boot.model$A
+ Ftinv <- chol2inv(chol(boot.model$Sigma[,,t]))
+ tmpt = tmpt + t(tmp) %*% Ftinv %*% tmp + sum(diag(Ftinv))
+ }
+ eqn8[i] = tmpt
+ } #end AICi section
+
+ # Step 6: Draw the progress bar if silent=F and time library is installed
if(drawProgressBar) prev <- progressBar(i/nBoot,prev)
- }
+ } # end of for loop for nBoots
- AICb = -4*(sum(logL))/nBoot + 2*model$loglike
- return(list(AICb = AICb, logL.star=logL, U = U, Q = Q, R = R, A = A, B = B, states = states, boot.data=boot.data))
+ if( "Param.AICb" %in% output ) # Note that the AICb calc generally uses bootstrapped data not parametric data like AICi
+ Param.AICb = -4*(sum(logL.Param.AICb))/nBoot + 2*model$loglike # -2*model$loglike + 2*(1/N)*(-2)*sum(boot$Yloglike-model$loglike)
+ if( "Hessian.AICb" %in% output ) # Note that the AICb calc generally uses bootstrapped data not parametric data like AICi
+ Hessian.AICb = -4*mean(logL.Hessian.AICb,na.rm=TRUE) + 2*model$loglike # -2*model$loglike + 2*(1/N)*(-2)*sum(boot$Yloglike-model$loglike)
+ if( "Nonparam.AICb" %in% output ) # Note that the AICb calc generally uses bootstrapped data not parametric data like AICi
+ Nonparam.AICb = -4*(sum(logL.Nonparam.AICb))/nBoot + 2*model$loglike # -2*model$loglike + 2*(1/N)*(-2)*sum(boot$Yloglike-model$loglike)
+ if( "AICi" %in% output ) {
+ AICi = -2*model$loglike + (-2)*(1/nBoot)*sum(logL2.AICi - logL.AICi)
+ Tconstant = numYears*numSites
+ AICi.approx = -2 * model$loglike + (-1 * Tconstant + (1/nBoot)*sum( eqn8 ))
+ }
+
+ return(list(Hessian.AICb=Hessian.AICb, Nonparam.AICb = Nonparam.AICb, Param.AICb = Param.AICb, AICi=AICi, AICi.approx=AICi.approx, logL.Hessian.AICb=logL.Hessian.AICb, logL.Param.AICb=logL.Param.AICb, logL.Nonparam.AICb=logL.Nonparam.AICb, logL.AICi = logL.AICi, boot.params=boot.params, boot.states=boot.states, boot.data=boot.data))
}
-makeNewData = function(Errors, model, Sigma, sigmaSqrt, B.Kt.sqrtSigma) {
- # This function updated by EW and EH Dec 02, 2008
+###############################################################################################################################################
+# makeNewData
+###############################################################################################################################################
+makeNewData = function(Errors, model, Sigma, sigmaSqrt, B.Kt.sqrtSigma, parametric.bootstrapping=FALSE, return.state.process=FALSE ) {
# Errors = std innovations
- origData = model$data
-
newData = matrix(-99, model$numYrs, model$n)
newStates = matrix(-99, model$numYrs+1, model$m) # States = years x subpops
numMissingVals = length(which(Errors==0))
- if(numMissingVals == 0) {
+ if(numMissingVals == 0 & parametric.bootstrapping==FALSE) {
# Then the bootstrap algorithm proceeds
# Stoffer & Wall suggest not sampling from innovations 1-3 (not much data)
- minIndx = ifelse(model$numYrs > 5, 4, 1)
- samp = sample(seq(minIndx, model$numYrs), size = model$numYrs, replace = T)
- # EW added as.matrix() on 01/26/09
- e = as.matrix(Errors[samp,])
+ minIndx = ifelse(model$numYrs > 5, 3, 1)
+ samp = sample(seq(minIndx+1, model$numYrs), size = (model$numYrs-minIndx), replace = TRUE)
+ e = as.matrix(Errors)
+ e[1:minIndx,] = as.matrix(Errors[1:minIndx,])
+ e[(minIndx+1):model$numYrs,] = as.matrix(Errors[samp,])
# newStates is a[] in the writeup by EH
newStates[1,] = model$x00
#newStates[2,] = model$B %*% model$x00 + model$U + model$B %*% model$Kt[,,1] %*% sigmaSqrt[,,1] %*% e[1,]
- newStates[2,] = model$B %*% model$x00 + model$U + B.Kt.sqrtSigma[,,1] %*% e[1,]
- newData[1,] = model$x00[model$whichPop] + model$A + sigmaSqrt[,,1] %*% e[1,] # eqn 1.7 in writeup by EH
+ newStates[2,] = model$B %*% newStates[1,] + model$U + B.Kt.sqrtSigma[,,1] %*% e[1,]
+ newData[1,] = newStates[1,][model$whichPop] + model$A + sigmaSqrt[,,1] %*% e[1,] # eqn 1.7 in writeup by EH
for(i in 2:model$numYrs) { # Deal with years 2:T, use sigma, Kt, B, U, A
newStates[i+1,] = model$B %*% newStates[i,] + model$U + B.Kt.sqrtSigma[,,i] %*% e[i,]
newData[i,] = newStates[i,][model$whichPop] + model$A + sigmaSqrt[,,i] %*% e[i,]
}
- newData[which(e == 0)]=-99 #newData is going to be used in the KalmanEM function which expects -99 for missing values
- # return the new data
}
- if(numMissingVals != 0) {
+ if(numMissingVals != 0 | parametric.bootstrapping==TRUE) {
+ ###### Check that package mvtnorm is installed and load it if it isn't already loaded
+ if(require(mvtnorm)==FALSE) {
+ str1 = "The makeNewData functions require the package mvtnorm when doing parametric bootstrapping and you haven't installed it yet.\n"
+ str2 = "Install the package before continuing by\n"
+ str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
+ cat(paste(str1,str2,str3,sep=""))
+ stop("the mvtnorm package is not installed")
+ }
newStates[1,] = model$x00 # t = 0
# create a matrix of RVs for observation error
obs.error = rmvnorm(model$numYrs, mean = rep(0, nrow(as.matrix(model$R))), sigma = model$R, method="chol")
@@ -677,12 +1077,16 @@
newStates[i,] = model$B %*% newStates[i-1,] + model$U + pro.error[i-1,]
newData[i-1,] = model$Z %*% newStates[i,] + model$A + obs.error[i-1,]
}
- newData[which(Errors==0)]=-99
+ newData[which(Errors==0)]=-99 #newData is going to be used in the KalmanEM function which expects -99 for missing values
}
+if(return.state.process==TRUE) return(newStates)
+else return(newData)
- return(newData)
}
+###############################################################################################################################################
+# stdInnov
+###############################################################################################################################################
stdInnov = function(SIGMA, INNOV) {
# This function added by EW Nov 3, 2008
# This function is only called once by bootstrapMSSM()
@@ -692,8 +1096,7 @@
SI = matrix(0, numSites, numYrs)
for(i in 1:numYrs) {
a = SIGMA[,,i]
- a.eig = eigen(a)
- # EW added as.matrix() on 01/26/09 for single trajectory model
+ a.eig = eigen(a)
a.sqrt = a.eig$vectors %*% makediag((sqrt(a.eig$values))) %*% solve(a.eig$vectors)
SI[,i] = chol2inv(chol(a.sqrt)) %*% INNOV[,i] # eqn 1, p359 S&S
}
@@ -701,3 +1104,162 @@
return(SI)
}
+#######################################################################################################
+# vectorizemssm
+#######################################################################################################
+vectorizemssm = function(mssm.model, paramvector=NA) {
+#This helper function
+#if paramvector=NA) returns a vector version of all the estimated parameters (for use in say optim) from a mssm model
+#if paramvector is passed in) returns a mssm object with the parameters fixed by paramvector
+
+#attach would be risky here since user might have one of these variables in their workspace
+U=mssm.model$U; Q=mssm.model$Q; R=mssm.model$R; A=mssm.model$A; B=mssm.model$B; x00=mssm.model$x00;
+Q.groups=mssm.model$Q.groups; R.groups=mssm.model$R.groups; U.groups=mssm.model$U.groups
+varcov.Q=mssm.model$varcov.Q; varcov.R=mssm.model$varcov.R
+n=mssm.model$n; m=mssm.model$m; whichPop=mssm.model$whichPop
+if(length(paramvector)==1 & is.na(paramvector[1])) {
+paramvec.names=c()
+ #x00
+ if(mssm.model$x00prior==FALSE) x00.params=as.vector(x00) else x00.params=c()
+ paramvec.names=c(paramvec.names,rep("x00",length(x00.params)))
+ #B
+ if(mssm.model$estInteractions==TRUE) stop("Error in emHessian: Hessian when B estimated is not coded in yet")
+ #U
+ U.numGroups = max(U.groups)
+ ZU = matrix(0,m,U.numGroups) # matrix to allow shared growth rates
+ for(i in 1:U.numGroups) ZU[which(U.groups == i),i] <- 1
+ U.params = as.vector(t(ZU)%*%U) / colSums(ZU)
+ paramvec.names=c(paramvec.names,rep("U",length(U.params)))
+ #Q
+ Q.numGroups = max(Q.groups)
+ ZQ = matrix(0,m,Q.numGroups) # matrix to allow shared process variances
+ for(i in 1:Q.numGroups) ZQ[which(Q.groups==i),i] = 1
+ if(varcov.Q=="diagonal") Q.params = as.vector(t(ZQ)%*%array(takediag(Q),dim=c(m,1))) / colSums(ZQ)
+ if(varcov.Q=="equalvarcov") Q.params = c(Q[1,1],Q[1,2])
+ if(varcov.Q=="unconstrained") Q.params = as.vector(Q)
+ if(varcov.Q=="equalvarcov.groups") stop("varcov.Q=equalvarcov.groups is not coded in yet")
+ paramvec.names=c(paramvec.names,rep("Q",length(Q.params)))
+ #R
+ R.numGroups = max(R.groups)
+ ZR <- matrix(0,n,R.numGroups) # matrix to allow shared measurement errs
+ for(i in 1:R.numGroups) ZR[which(R.groups==i),i] <- 1
+ if(varcov.R=="diagonal") R.params = as.vector(t(ZR)%*%array(takediag(R),dim=c(n,1))) / colSums(ZR)
+ if(varcov.R=="equalvarcov") R.params = c(R[1,1],R[1,2])
+ if(varcov.R=="unconstrained") R.params=as.vector(R)
+ paramvec.names=c(paramvec.names,rep("R",length(R.params)))
+ #A
+ A.params=A[which(A!=0)]
+ paramvec.names=c(paramvec.names,rep("A",length(A.params)))
+ paramvector = c(x00.params, U.params, Q.params, R.params, A.params)
+ names(paramvector)=paramvec.names
+ return(paramvector)
+ } # end if paramvector==NA
+
+else {
+ names(paramvector)=NULL #don't want any new names lurking about
+ m=mssm.model$m; n=mssm.model$n; whichPop=mssm.model$whichPop
+ U.numGroups = max(mssm.model$U.groups)
+ Q.numGroups = max(mssm.model$Q.groups)
+ R.numGroups = max(mssm.model$R.groups)
+ ZU = matrix(0,m,U.numGroups); for(i in 1:U.numGroups) ZU[which(U.groups == i),i] = 1
+ ZQ = matrix(0,m,Q.numGroups); for(i in 1:Q.numGroups) ZQ[which(Q.groups==i),i] = 1
+ ZR = matrix(0,n,R.numGroups); for(i in 1:R.numGroups) ZR[which(R.groups==i),i] = 1
+
+ #x00
+ if(mssm.model$x00prior==FALSE) {
+ mssm.model$x00=array(paramvector[1:m],dim=c(m,1))
+ paramvector=paramvector[(m+1):length(paramvector)]
+ }
+ #U
+ mssm.model$U = ZU%*%array(paramvector[1:U.numGroups],dim=c(U.numGroups,1))
+ paramvector=paramvector[(U.numGroups+1):length(paramvector)]
+ #Q
+ Q.numGroups = dim(ZQ)[2]
+ if(mssm.model$varcov.Q=="diagonal") {
+ mssm.model$Q = makediag(ZQ%*%array(paramvector[1:Q.numGroups],dim=c(Q.numGroups,1)))
+ paramvector=paramvector[(Q.numGroups+1):length(paramvector)] }
+ if(mssm.model$varcov.Q=="unconstrained") {
+ mssm.model$Q = array(paramvector[1:(m*m)],dim=c(m,m))
+ paramvector=paramvector[(m*m+1):length(paramvector)] }
+ if(mssm.model$varcov.Q=="equalvarcov") {
+ mssm.model$Q = diag(paramvector[1],m,m)+array(paramvector[2],dim=c(m,m))-diag(paramvector[2],m,m)
+ paramvector=paramvector[3:length(paramvector)] }
+ #R
+ R.numGroups = dim(ZR)[2]
+ if(mssm.model$varcov.R=="diagonal") {
+ mssm.model$R = makediag(ZR%*%array(paramvector[1:R.numGroups],dim=c(R.numGroups,1)))
+ paramvector=paramvector[(R.numGroups+1):length(paramvector)] }
+ if(mssm.model$varcov.R=="unconstrained") {
+ mssm.model$R = array(paramvector,dim(n,n))
+ paramvector=paramvector[(n*n+1):length(paramvector)] }
+ if(mssm.model$varcov.R=="equalvarcov") {
+ mssm.model$R = diag(paramvector[1],n,n)+array(paramvector[2],dim=c(n,n))-diag(paramvector[2],n,n)
+ paramvector=paramvector[3:length(paramvector)] }
+ #A
+ A=array(0,dim=c(n,1))
+ tableA <- as.numeric(table(whichPop)) #figure out which state processes have more than 1 observation time series
+ numSubsWith2Sites = which(tableA > 1)
+ if(length(numSubsWith2Sites) > 0) {
+ for(subpops in 1:length(numSubsWith2Sites)) {
+ this.sub = numSubsWith2Sites[subpops]
+ this.n = tableA[this.sub]
+ this.index = which(whichPop==this.sub)
+ As = c(0,paramvector[1:(this.n-1)])
+ paramvector=paramvector[this.n:length(paramvector)]
+ A[this.index] = As
+ }
+ mssm.model$A=A
+ }
+ return(mssm.model)
+} #end if paramvector arg is passed in
+}
+
+#######################################################################################################
+# emHessian
+#######################################################################################################
+emHessian = function(mssm.model) {
+#this function expects a mssm model as output by KalmanEM
+if(require(nlme)==FALSE) {
+ str1 = "The emHessian function requires the package nlme and you haven't installed it yet.\n"
+ str2 = "Install the package before continuing by\n"
+ str3 = "going to the Packages tab and selecting install (if working from the R GUI).\n"
+ cat(paste(str1,str2,str3,sep=""))
+ stop("Error: missing required nlme package")
+}
+paramvector=vectorizemssm(mssm.model)
+
+kfNLL=function(paramvec, model){
+ new.model=vectorizemssm(model, paramvector=paramvec)
+ kf = Kfilter(new.model)
+ return(-kf$loglik)
+ }
+
+#Hessian and gradient
+emhess=fdHess(paramvector, function(paramvector, mssm.model) kfNLL(paramvector, mssm.model), mssm.model)
+mssm.model$Hessian=emhess$Hessian
+mssm.model$gradient=emhess$gradient
+
+#TIC
+J0=array(emhess$gradient,dim=c(length(paramvector),1))%*%array(emhess$gradient,dim=c(1,length(paramvector)))
+I0=emhess$Hessian
+TIC = try(-2*mssm.model$loglike+2*sum(diag(J0%*%solve(I0))))
+mssm.model$TIC = ifelse(!inherits(TIC, "try-error"),TIC,NA)
+
+#standard errors
+stderr=try(sqrt(diag(solve(emhess$Hessian)))) #invert the Hessian; wrap in a try() in case it fails
+stderr=ifelse(!inherits(stderr, "try-error"),stderr,rep(NA,length(paramvector)))
+stderr.model=vectorizemssm(mssm.model,paramvector=stderr)
+if(mssm.model$x00prior==TRUE) {stderr.model$x00=NA; stderr.model$V00=NA}
+if(mssm.model$x00prior==FALSE) stderr.model$V00=NA
+if(mssm.model$estInteractions==FALSE) stderr.model$B=NA
+if(mssm.model$estInteractions==TRUE) stderr.model$B="Hessian when B estimated is not coded in yet"
+mssm.model$A.stderr=stderr.model$A
+mssm.model$B.stderr=stderr.model$B
+mssm.model$U.stderr=stderr.model$U
+mssm.model$Q.stderr=stderr.model$Q
+mssm.model$R.stderr=stderr.model$R
+mssm.model$x00.stderr=stderr.model$x00
+mssm.model$V00.stderr=stderr.model$V00
+
+return(mssm.model)
+}
\ No newline at end of file