/*-------------------------------------------------------------------------------
  Description:  Estimates ADL model based on MD-OLS, MD-TSLS, and MD-GMM esimator
  -------------------------------------------------------------------------------
  Inputs: 
    Data: the data for estimation.
    Choose the value of M for multiple-differencing.
    Choose the instrumental variable (IV) for conducting the MD-TSLS and MD-GMM. 
  -------------------------------------------------------------------------------
  Output:
    The estimate and t ratio for each coefficent of regressor.
 --------------------------------------------------------------------------------
  Author: Wen-Jen Tsay (March 17, 2009)
 --------------------------------------------------------------------------------
  Example: the data is from DGW (1997)  
    The difference beteen this example and that of De Boef and Keele (2008) is 
    that my data for OLS is only 78, while it is 79 for their paper.  
    The results are presented with the scalar ADL model in Table 8 of 
    Tsay (2009). 
 --------------------------------------------------------------------------------
  
*/


new;

library pgraph;

re = 1; 

output file = code.out reset;

t_begin=hsec;


load data[80,20] = dgw1.dat; /* the data from DGW (1997) */

capp = data[.,3];
newapp = data[.,4];
econexp = data[.,5];
nytavg = data[.,6];
kg = data[.,10];
hb = data[.,14];
vetoes = data[.,16];
override = data[.,17];
intrasum = data[.,18];
mbills=data[.,19];


/* here I use M=0 to deal with the level data */

y = capp[3:80]; /* dependent variable */
y_1 = capp[2:79]; /* lagged dependent variable */


/* constant and other regressors omitted in De Boef and Keele (2008): Model 1: p.197 */
xhat = ones(78,1)~kg[3:80]~kg[2:79]~hb[3:80]~hb[2:79];


/* here is to partial out the impacts of xhat on dependent variable and other regressors */
yadl = y - xhat*invpd(xhat'xhat)*xhat'y;
y_1adl = y_1 - xhat*invpd(xhat'xhat)*xhat'y_1;
econexpadl = econexp[3:80] - xhat*invpd(xhat'xhat)*xhat'econexp[3:80];
econexp_1adl = econexp[2:79] - xhat*invpd(xhat'xhat)*xhat'econexp[2:79];
econexp_2adl = econexp[1:78] - xhat*invpd(xhat'xhat)*xhat'econexp[1:78];

nytavgadl = nytavg[3:80] - xhat*invpd(xhat'xhat)*xhat'nytavg[3:80];
nytavg_1adl = nytavg[2:79] - xhat*invpd(xhat'xhat)*xhat'nytavg[2:79];
nytavg_2adl = nytavg[1:78] - xhat*invpd(xhat'xhat)*xhat'nytavg[1:78];


newappadl = newapp[3:80] - xhat*invpd(xhat'xhat)*xhat'newapp[3:80];
newapp_1adl = newapp[2:79] - xhat*invpd(xhat'xhat)*xhat'newapp[2:79];
newapp_2adl = newapp[1:78] - xhat*invpd(xhat'xhat)*xhat'newapp[1:78];

vetoesadl = vetoes[3:80] - xhat*invpd(xhat'xhat)*xhat'vetoes[3:80];
vetoes_1adl = vetoes[2:79] - xhat*invpd(xhat'xhat)*xhat'vetoes[2:79];
vetoes_2adl = vetoes[1:78] - xhat*invpd(xhat'xhat)*xhat'vetoes[1:78];


overadl = override[3:80] -  xhat*invpd(xhat'xhat)*xhat'override[3:80];
over_1adl = override[2:79] -  xhat*invpd(xhat'xhat)*xhat'override[2:79];
over_2adl = override[1:78] -  xhat*invpd(xhat'xhat)*xhat'override[1:78];

intraadl = intrasum[3:80] -  xhat*invpd(xhat'xhat)*xhat'intrasum[3:80];
intra_1adl = intrasum[2:79] -  xhat*invpd(xhat'xhat)*xhat'intrasum[2:79];
intra_2adl = intrasum[1:78] -  xhat*invpd(xhat'xhat)*xhat'intrasum[1:78];


mbillsadl = mbills[3:80] -  xhat*invpd(xhat'xhat)*xhat'mbills[3:80];
mbills_1adl = mbills[2:79] -  xhat*invpd(xhat'xhat)*xhat'mbills[2:79];
mbills_2adl = mbills[1:78] -  xhat*invpd(xhat'xhat)*xhat'mbills[1:78];


t = rows(yadl);
c=ones(t,1);


/* demeaned lagged dependent varialbe and regressor: loss one observation */
yadl = yadl - meanc(yadl);
y_1adl = y_1adl - meanc(y_1adl);

newappadl = newappadl- meanc(newappadl);
newapp_1adl = newapp_1adl - meanc(newapp_1adl);

econexpadl = econexpadl - meanc(econexpadl);
econexp_1adl = econexp_1adl - meanc(econexp_1adl);
econexp_2adl = econexp_2adl - meanc(econexp_2adl);


nytavgadl = nytavgadl - meanc(nytavgadl);
nytavg_1adl = nytavg_1adl - meanc(nytavg_1adl);
nytavg_2adl = nytavg_2adl - meanc(nytavg_2adl);

vetoesadl = vetoesadl - meanc(vetoesadl);
vetoes_1adl = vetoes_1adl - meanc(vetoes_1adl);
vetoes_2adl = vetoes_2adl - meanc(vetoes_2adl);

overadl = overadl - meanc(overadl);
over_1adl = over_1adl - meanc(over_1adl);
over_2adl = over_2adl - meanc(over_2adl);


intraadl = intraadl - meanc(intraadl);
intra_1adl = intra_1adl - meanc(intra_1adl);
intra_2adl = intra_2adl - meanc(intra_2adl);


mbillsadl = mbillsadl - meanc(mbillsadl);
mbills_1adl = mbills_1adl - meanc(mbills_1adl);
mbills_2adl = mbills_2adl - meanc(mbills_2adl);

/* regressors shown in the paper */
xx = y_1adl~newappadl~newapp_1adl~econexpadl~econexp_1adl
~nytavgadl~nytavg_1adl~vetoesadl~vetoes_1adl~overadl~over_1adl~intraadl~intra_1adl~mbillsadl~mbills_1adl;

nk = cols(xx);

/* demeaned dependent variable */
yy = yadl;

/* OLS estimator */
beta1 = (invpd(xx'xx)*
                    xx'yy
            )';


eols= yy-(xx)*(beta1');
tols = rows(eols);


vbols = (eols'eols)/(tols-5-cols(xx));
volshat = invpd(xx'xx)*vbols;

volshat1 = (sqrt(diag(volshat)))';
olstest = (beta1') ./(volshat1');


/* IV regressor */

/* using IV to correct the endogeneity of presidental approval, econexp */
/* the first five variables (lagged two period) are the IV */

ivx = nytavg_2adl~vetoes_2adl~over_2adl~intra_2adl~mbills_2adl 
~nytavgadl~nytavg_1adl~vetoesadl~vetoes_1adl~overadl~over_1adl~intraadl~intra_1adl~mbillsadl~mbills_1adl;


npar = cols(ivx); /* depend on IV number */

/* Two-stage-least-squares estimator */
pz = ivx*invpd(ivx'ivx)*(ivx');
ivbeta1 = (
                 invpd(xx'pz*xx)*
                    (xx'pz*yy)
               )';


ei= yy-(xx)*(ivbeta1');
vb = lrvarrob(ivx,ei);
te = rows(ei);

/* long run variance estima using robinson, 1998 */

vhat =

           invpd( (xx'ivx/te)*invpd(ivx'ivx/te)*(ivx'xx/te)) *
           (xx'ivx/te)*invpd(ivx'ivx/te)
           *(vb)*
           (
           invpd( (xx'ivx/te)*invpd(ivx'ivx/te)*(ivx'xx/te)) *
           (xx'ivx/te)*invpd(ivx'ivx/te)
           )'

;

ivhat1 = (sqrt(diag(vhat)))';
ivtest = sqrt(te)*(ivbeta1') ./(ivhat1');


/* GMM estimator */

gmmbeta1 = (
                 invpd( xx'ivx*invpd(vb)*ivx'xx )*
                      ( xx'ivx*invpd(vb)*ivx'yy )
               )';


gmmei= yy-(xx)*(gmmbeta1');

jtest = te*(ivx'gmmei/te)'*invpd(vb)*(ivx'gmmei/te);

gmmhat =  invpd( (xx'ivx/te)*invpd(vb)*(ivx'xx /te) );

gmmhat1 = (sqrt(diag(gmmhat)))';
gmmtest = sqrt(te)*(gmmbeta1') ./(gmmhat1');


format /rd 8,2;

output on;
?;


"Replications:     " re;
"Sample Size is     " rows(yadl);
"Remaining size is  " rows(yy);
"OLS                " beta1;
"inference OLS      " olstest';
"size for TSLS      " te;
"TSLS               " ivbeta1;
"inference TSLS     " ivtest';
"GMM                " gmmbeta1;
"inference GMM      " gmmtest';


format /rd 8,8;

output on;
"jtest is           " jtest;


output off;


proc (1) = lrvarols(u,v);
local tt,gammap,i,gamma0,gamman,j,lrvar;

tt = rows(v);

gammap = zeros(nk,nk);
i = 1;do until i > tt-1;


gammap = gammap + ((v[1:tt-i,.]'v[1+i:tt,.]/tt)) .*.
                  ((u[1:tt-i,.]'u[1+i:tt,.]/tt));

i = i+1;endo;

gamma0 = (v'v/tt) .*. (u'u/tt);

gamman = zeros(nk,nk);
j = 1;do until j> tt-1;


gamman = gamman + (v[j+1:tt,.]'v[1:tt-j,.]/(tt)) .*.
                  (u[j+1:tt,.]'u[1:tt-j,.]/(tt));

j = j+1;endo;


lrvar = (gammap+gamma0+gamman);

retp(lrvar);
endp;


output off;


t_used=hsec-t_begin;

a = t_used/100;
output on;
"time used";
print a;


end;

proc (1) = lrvarrob(u,v);
local tt,gammap,i,gamma0,gamman,j,lrvar;

tt = rows(v);

gammap = zeros(npar,npar);
i = 1;do until i > tt-1;


gammap = gammap + ((v[1:tt-i,.]'v[1+i:tt,.]/tt)) .*.
                  ((u[1:tt-i,.]'u[1+i:tt,.]/tt));

i = i+1;endo;

gamma0 = (v'v/tt) .*. (u'u/tt);

gamman = zeros(npar,npar);
j = 1;do until j> tt-1;


gamman = gamman + (v[j+1:tt,.]'v[1:tt-j,.]/(tt)) .*.
                  (u[j+1:tt,.]'u[1:tt-j,.]/(tt));

j = j+1;endo;


lrvar = (gammap+gamma0+gamman);

retp(lrvar);
endp;