# R functions for # Marginal Regression Analysis of Longitudinal Data with Time-Dependent Covariates: # A Generalised Method of Moments Approach # by Tze Leung Lai and Dylan Small ###################################### ### These functions compute the two step GMM estimator for a given ### classification of time varying covariates into Type II and Type III ### covariates. The user should first use the function validmomcondticovfunc to### calculate the valid moment conditions for the specification of the ### time-varying covariates into Type II and Type III and then use the function ### twostepgmmticovfun to calculate the two step GMM estimator and its ### variance. ### These functions assume that all units are observed at T time points ##################################### # This function computes the valid moment conditions under a classification # of time varying covariates into Type II and Type III. # In tpcovexogvector, a 0 indicates a Type III time-varying covariate and # a 1 indicates indicates a Type II time-varying covariate validmomcondticovfunc=function(tp,tvcovexogvector,noticov){ notvcov=length(tvcovexogvector); momsel=rep(0,notvcov*(tp^2)+tp+tp*noticov); count=1; for(p in 1:notvcov){ for(i in 1:tp){ for(j in 1:tp){ if(tvcovexogvector[p]==0){ if(i==j){ momsel[count]=1; } } if(tvcovexogvector[p]==1){ if(i>=j){ momsel[count]=1; } } count=count+1; } } } momsel[(notvcov*tp^2+1):(notvcov*tp^2+tp+tp*noticov)]=rep(1,tp*(noticov+1)); momsel; } # Function to calculate two-step GMM estimator # The momsel vector should be obtained from the function validmomcondticovfunc. # ymat should be arranged as a NxT matrix with each row giving a unit's # outcomes # Let p=number of time varying covariates. # xmat should be an Nx(T*p) matrix where the first T columns of the row are # the unit's first time varying covariate for times 1,...,T, the (T+1):(2*T) # are the unit's second time-varying covariate for times 1,...,T,... # zmat should be an N*(number of time-invariant covariates) matrix where # each row contains a unit's time-invariant covariates twostepgmmticovfunc=function(ymat,xmat,zmat,momsel){ momsel=(momsel==1); n=nrow(ymat); tp=ncol(ymat); notvcov=ncol(xmat)/tp; noticov=ncol(zmat); matgmmfull=matgmmticovfunc(ymat,xmat,zmat); Gsel=matgmmfull$G[momsel,]; Hsel=matgmmfull$H[momsel,]; #Obtain initial estimate using working independence yvector=as.vector(ymat); zvectorexp=matrix(rep(0,tp*n*noticov),ncol=noticov); for(i in 1:noticov){ for(j in 1:tp){ zvectorexp[((j-1)*n+1):(j*n),i]=as.vector(zmat[,i]); } } xvectorexp=matrix(rep(0,tp*n*notvcov),ncol=notvcov); for(i in 1:notvcov){ xvectorexp[,i]=as.vector(xmat[,((i-1)*tp+1):(i*tp)]); } betahatinit=coef(lm(yvector~xvectorexp+zvectorexp)); # Obtain weight matrix based on initial estimate mominitfull=momgmmticovfunc(ymat,xmat,zmat,betahatinit); mominitsel=mominitfull[,momsel]; weightmat=ginv(var(mominitsel)); # GMM estimate betahat=solve(t(Gsel)%*%weightmat%*%Gsel)%*%t(Gsel)%*%weightmat%*%Hsel; # Overidentifying restrictions test overidstat=(1/n)*t(Hsel-Gsel%*%betahat)%*%weightmat%*%(Hsel-Gsel%*%betahat); # Asymptotic variance estimate asycovest=n*solve(t(Gsel)%*%weightmat%*%Gsel); list(betahat=betahat,overidstat=overidstat,asycovest=asycovest); } ############################################################## ## These are functions that are needed for twostepgmmticovfunc ## and need to be read into R before using twostepgmmticovfunc ############################################################## # GMM functions that allow for time-invariant covariates # and also allow for more than one time-varying covariate # The coefficients are intercept, time varying covariates, time invariant # covariates # Moments can be expressed as H-G*[beta0; beta1; beta2...] # Data is assumed to be in nxtp matrices # xmat should be an n*(notvcov*tp) matrix and the first time varying # covariate should be in the first tp columns, the second time varying # covariate should be in the second tp columns and so on. matgmmticovfunc=function(ymat,xmat,zmat){ n=nrow(ymat); tp=ncol(ymat); notvcov=ncol(xmat)/tp; noticov=ncol(zmat); nomoms=tp^2*notvcov+tp+noticov*tp; H=matrix(rep(0,nomoms),ncol=1); G=matrix(rep(0,(notvcov+noticov+1)*nomoms),ncol=(notvcov+noticov+1)); count=1; for(p in 1:notvcov){ for(i in 1:tp){ for(j in 1:tp){ H[count]=sum(xmat[,(p-1)*tp+i]*ymat[,j]); G[count,1]=sum(xmat[,(p-1)*tp+i]); for(m in 1:notvcov){ G[count,1+m]=sum(xmat[,(p-1)*tp+i]*xmat[,(m-1)*tp+j]); } for(k in 1:noticov){ G[count,(1+notvcov+k)]=sum(xmat[,(p-1)*tp+i]*zmat[,k]); } count=count+1; } } } for(i in 1:tp){ H[count]=sum(ymat[,i]); G[count,1]=n; for(m in 1:notvcov){ G[count,(1+m)]=sum(xmat[,(m-1)*tp+i]); } for(k in 1:noticov){ G[count,(1+notvcov+k)]=sum(zmat[,k]); } count=count+1; } for(i in 1:noticov){ for(j in 1:tp){ H[count]=sum(zmat[,i]*ymat[,j]); G[count,1]=sum(zmat[,i]); for(m in 1:notvcov){ G[count,(1+m)]=sum(zmat[,i]*xmat[,(m-1)*tp+j]); } for(k in 1:noticov){ G[count,(1+notvcov+k)]=sum(zmat[,i]*zmat[,k]); } } count=count+1; } list(H=H,G=G); } # Calculate moment conditions for each subject at beta momgmmticovfunc=function(ymat,xmat,zmat,beta){ n=nrow(ymat); tp=ncol(ymat); notvcov=ncol(xmat)/tp; noticov=ncol(zmat); nomoms=tp^2*notvcov+tp+noticov*tp; mommat=matrix(rep(0,n*nomoms),ncol=nomoms); # Calculate residuals yvector=as.vector(ymat); xmatvector=matrix(rep(0,notvcov*n*tp),ncol=notvcov); for(m in 1:notvcov){ xmatvector[,m]=as.vector(xmat[,((m-1)*tp+1):(m*tp)]); } zmatexpand=matrix(rep(0,n*noticov*tp),ncol=noticov); count=1; for(i in 1:tp){ zmatexpand[count:(count+n-1),]=zmat; count=count+n; } covmat=cbind(rep(1,n*tp),xmatvector,zmatexpand); resid=yvector-covmat%*%beta; residmat=matrix(resid,ncol=tp); count=1; for(p in 1:notvcov){ for(i in 1:tp){ for(j in 1:tp){ mommat[,count]=xmat[,(p-1)*tp+i]*residmat[,j]; count=count+1; } } } for(i in 1:tp){ mommat[,count]=residmat[,i]; count=count+1; } for(i in 1:noticov){ for(j in 1:tp){ mommat[,count]=zmat[,i]*residmat[,j]; count=count+1; } } mommat; }