$ontext Bootstrap GME model Copyright 2007 Erwin Kalvelagen, Amsterdam Optimization References: [1] Maximum entropy estimation in economic models with linear inequality restrictions, Randall C. Campbell, R. Carter Hill Department of Economics, Louisiana State University, Baton Rouge, LA 70803,USA, 2001 [2] Ramanathan, R., 2002. Introductory econometrics with applications (Harcourt College Publishers, Fort Worth). [3] Bardley Efron, Robert J. Tibshirani, "An Introduction to the Bootstrap", Chapman & Hall, 1993 Files: bootstrap-gme.gms -- gams file poverty.inc -- data file tc.inc (optional) -- file with quantiles for Student t distribution $offtext set i /case1*case116/ k0 / constant 'constant term in regression' povrate 'poverty level' urb 'not used' famsize 'average household size' unemp 'percentage unemployment rate' highschl 'percentage of population age 25 and over with high school degree only' college 'percentage of population age 25 and over that completed 4 or more years of college' medinc 'median household income' D90 'dummy: one for the 1990 Census and zero for the 1980 Census' / k1(k0) 'data used' /povrate,famsize,unemp,highschl,college,medinc,D90/ ; *------------------------------------------------------------------------------ * input data from Ramanthan (2002, Data 7-6, p. 653) *------------------------------------------------------------------------------ table data(i,k0) '-- input data from Ramanthan [2] (2002, Data 7-6, p. 653)' $include poverty.inc ; display data; *------------------------------------------------------------------------------ * critical values t distribution * this table is available from amsterdamoptimization.com * if not available you can use a normal approximation *------------------------------------------------------------------------------ $if exist tc.inc $include tc.inc $if defined tc $goto tcok * we have no access to tc.inc so we use a normal approximation set prob /p1,p2,p3,p4,p5,p6/; parameter probval(prob) / p1 0.10, p2 0.05, p3 0.025, p4 0.01, p5 0.005, p6 0.001 /; parameter qnorm(prob) / p1 1.281552, p2 1.644854, p3 1.959964, p4 2.326348, p5 2.575829, p6 3.090232 /; $label tcok *------------------------------------------------------------------------------ * create some summary statistics *------------------------------------------------------------------------------ parameter summary(k1,*) '-- summary statistics (table 1 in [1])'; summary(k1,'mean') = sum(i,data(i,k1))/card(i); summary(k1,'min') = smin(i,data(i,k1)); summary(k1,'max') = smax(i,data(i,k1)); summary(k1,'stdev') = sqrt[sum(i,sqr[data(i,k1)-summary(k1,'mean')])/(card(i)-1)]; summary(k1,'coeffvar') = summary(k1,'stdev')/summary(k1,'mean'); display summary; *------------------------------------------------------------------------------ * linear regression using OLS *------------------------------------------------------------------------------ set k(k0) 'regression coefficients' /constant,famsize,unemp,highschl,college,medinc,D90/; data(i,'constant') = 1; variables beta(k) 'regression coefficients' sse 'sum of squared errors' ; equations dummyobj 'dummy objective' fit(i) 'fit linear equations' ; dummyobj.. sse =n= 0; fit(i).. data(i,'povrate') =e= sum(k, beta(k)*data(i,k)); model ols /dummyobj,fit/; option lp=ls; ols.limrow = 0; ols.limcol = 0; ols.solprint = 0; solve ols using lp minimizing sse; * Note: compare OLS results to page 8 in [1] parameter estim(k,*,*) 'estimates'; estim(k,'OLS','-') = beta.l(k); *------------------------------------------------------------------------------ * Calculate OLS confidence intervals *------------------------------------------------------------------------------ parameter ols_se(k) 'standard errors'; ols_se(k) = beta.m(k); display ols_se; set ival 'confidence interval' /lo,up/; scalar ndf 'degrees of freedom'; ndf = card(i) - card(k); scalar alpha 'significance level' /0.025/; scalar qt 'critical value'; $if defined tc qt = sum((df,prob)$(dfval(df)=ndf and probval(prob)=0.025), tc(df,prob)); $if not defined tc abort$(ndf<30) "Normal approximation not valid"; $if not defined tc qt = sum(prob$(probval(prob)=0.025), qnorm(prob)); display ndf,alpha,qt; parameter ols_conf_ival(k,ival); ols_conf_ival(k,'lo') = beta.l(k) - qt*ols_se(k); ols_conf_ival(k,'up') = beta.l(k) + qt*ols_se(k); display ols_conf_ival; * Note: these can also be imported from the GDX file LS.GDX * See http://amsterdamoptimization.com/pdf/nlregression.pdf *------------------------------------------------------------------------------ * GME model *------------------------------------------------------------------------------ set j 'support points' /j1*j5/; variables p(k,j), w(i,j); p.lo(k,j) = 0.0001; w.lo(i,j) = 0.0001; variable entrpy 'entropy' e(i) 'residuals' ; equations parmsupp(k) 'parameter support' errsupp(i) 'error support' normp(k) 'normalize p' normw(i) 'normalize w' obj 'maximize entropy' linear(i) 'linear model' ; parameter z(k,j) 'parameter support matrix' v(i,j) 'error support matrix' ; obj.. entrpy =e= -sum((k,j), p(k,j)*log(p(k,j)))-sum((i,j), w(i,j)*log(w(i,j))); linear(i).. data(i,'povrate') =e= sum(k, beta(k)*data(i,k)) + e(i); parmsupp(k).. beta(k) =e= sum(j, z(k,j)*p(k,j)); errsupp(i).. e(i) =e= sum(j, v(i,j)*w(i,j)); normp(k).. sum(j, p(k,j)) =e= 1; normw(i).. sum(j, w(i,j)) =e= 1; model gme /obj,parmsupp,errsupp,linear,normp,normw/; *------------------------------------------------------------------------------ * GME model support data *------------------------------------------------------------------------------ set s 'error support scenarios' / s3, s4 /; set gmex 'parameter support scenarios' /gme1,gme2,gme3/; table z_all(gmex,k,j) 'parameter support for GME model' j1 j2 j3 j4 j5 gme1.constant -50 -25 0 25 50 gme1.famsize -20 -10 0 10 20 gme1.unemp -20 -10 0 10 20 gme1.highschl -20 -10 0 10 20 gme1.college -20 -10 0 10 20 gme1.medinc -20 -10 0 10 20 gme1.d90 -20 -10 0 10 20 gme2.constant -50 -25 0 25 50 gme2.famsize -10 -5 0 5 10 gme2.unemp -2 -1 0 1 2 gme2.highschl -2 -1 0 1 2 gme2.college -2 -1 0 1 2 gme2.medinc -10 -5 0 5 10 gme2.d90 -20 -10 0 10 20 gme3.constant -50 -25 0 25 50 gme3.famsize -5 0 5 10 15 gme3.unemp -1 0 1 2 3 gme3.highschl -3 -2 -1 0 1 gme3.college -3 -2 -1 0 1 gme3.medinc -15 -10 -5 0 5 gme3.d90 -20 -10 0 10 20 ; table esup_all(s,j) 'error support' j1 j2 j3 j4 j5 s3 -10 -5.0 0 5.0 10 s4 -13 -6.5 0 6.5 13 ; display "GME support data",z_all,esup_all; *------------------------------------------------------------------------------ * run GME scenarios *------------------------------------------------------------------------------ gme.limrow=0; gme.limcol=0; gme.solprint=2; gme.solvelink=2; loop((s,gmex), z(k,j) = z_all(gmex,k,j); v(i,j) = esup_all(s,j); solve gme maximizing entrpy using nlp; estim(k,s,gmex)=beta.l(k); ); *------------------------------------------------------------------------------ * report results *------------------------------------------------------------------------------ option estim:3:1:2; display "---------------------------------------------------", "OLS and GME estimates (table 5 in [1])", "---------------------------------------------------", estim; *------------------------------------------------------------------------------ * create bootstrap sample *------------------------------------------------------------------------------ set nbs 'bootstrap samples' /sample1*sample400/; parameter random(nbs,i) 'random draws with replacement'; random(nbs,i) = uniformint(1,card(i)); display random; alias (i,ii); set srandom(nbs,i,ii) 'as random but now expressed as a set'; srandom(nbs,i,ii)$(ord(ii)=random(nbs,i)) = yes; display srandom; *------------------------------------------------------------------------------ * run bootstrap method *------------------------------------------------------------------------------ parameter newdata(i,k0); equation linear2(i) 'linear model'; linear2(i).. newdata(i,'povrate') =e= sum(k, beta(k)*newdata(i,k)) + e(i); model gme2 /obj,parmsupp,errsupp,linear2,normp,normw/; gme2.limrow=0; gme2.limcol=0; gme2.solprint=2; gme2.solvelink=2; parameter estim2(nbs,k,s,gmex) 'estimates'; loop(nbs, newdata(i,k0) = 0; newdata(i,k0) = sum(srandom(nbs,i,ii), data(ii,k0)); loop((s,gmex), z(k,j) = z_all(gmex,k,j); v(i,j) = esup_all(s,j); solve gme2 maximizing entrpy using nlp; estim2(nbs,k,s,gmex)=beta.l(k); ); ); display estim2; *------------------------------------------------------------------------------ * calculate bootstrap confidence intervals * percentile method *------------------------------------------------------------------------------ * call rank first not from within a loop so declarations can happen set idummy/a/; parameters xdummy(idummy) /a 1/ rdummy(idummy) ; $libinclude rank xdummy idummy rdummy parameter bconfival2(k,*,*,ival) 'confidence interval percentile method -- table 13 in [1]'; bconfival2(k,'OLS','-',ival) = ols_conf_ival(k,ival); parameter bb(nbs) 'values to be sorted' pct(ival) 'percentiles' r(nbs) 'rank' ; loop((k,s,gmex), bb(nbs) = estim2(nbs,k,s,gmex); pct('LO') = 2.5; pct('UP') = 97.5; $libinclude rank bb nbs r pct bconfival2(k,s,gmex,ival) = pct(ival); ); option bconfival2:3:1:3; display bconfival2;