rm(list=ls())
setwd("C:/DAVE2/CIDE/Doc/Econometria II/Lab_7")
options(scipen=999)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Warning: package 'janitor' was built under R version 4.0.5
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
## Warning: package 'modelsummary' was built under R version 4.0.5
## Loading required package: dfidx
## Warning: package 'dfidx' was built under R version 4.0.5
##
## Attaching package: 'dfidx'
## The following object is masked from 'package:stats':
##
## filter
## Warning: package 'sandwich' was built under R version 4.0.5
## Loading required package: msm
# El método de Variables Intrumentales
Recuerda:
Comandos claves en R(funciones):
Puntos/conceptos claves:
Los datos provienen de la una Encuesta Nacional Longitudinal. Estos datos comenzaron en 1966 con 5.525 hombres de entre 14 y 24 años y continuaron su seguimiento hasta 1981. Estos datos provienen de 1966, la encuesta de referencia, y hay una serie de preguntas relacionadas con los mercados laborales locales. Uno de ellos es si el encuestado vive en el mismo condado que una universidad de 4 años (y 2 años).
Card era itneresado a estimar la siguiente expresi+on:
\[Y_i=\alpha+\delta S_i + \gamma X_i+\varepsilon_i\]
Donde \(Y\) es el logaritmo de los salario y \(S\) los años de escolaridad (\(X\) son unos regresores exogenos).
Pero hay abilidad en el error que puede ser correlacionadoa con C
## Warning: package 'haven' was built under R version 4.0.5
library(tidyverse)
read_data <- function(df)
{
full_path <- paste("https://raw.github.com/scunning1975/mixtape/master/",
df, sep = "")
df <- read_dta(full_path)
return(df)
}
card <- read_data("card.dta")
#Define variable
#(Y1 = Dependent Variable, Y2 = endogenous variable, X1 = exogenous variable, X2 = Instrument)
attach(card)
Y1 <- lwage
Y2 <- educ
X1 <- cbind(exper, black, south, married, smsa)
X2 <- nearc4
#OLS
ols_reg <- lm(Y1 ~ Y2 + X1)
summary(ols_reg)
##
## Call:
## lm(formula = Y1 ~ Y2 + X1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.59924 -0.23035 0.01812 0.23046 1.36797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.063317 0.063740 79.437 <0.0000000000000002 ***
## Y2 0.071173 0.003482 20.438 <0.0000000000000002 ***
## X1exper 0.034152 0.002214 15.422 <0.0000000000000002 ***
## X1black -0.166027 0.017614 -9.426 <0.0000000000000002 ***
## X1south -0.131552 0.014969 -8.788 <0.0000000000000002 ***
## X1married -0.035871 0.003401 -10.547 <0.0000000000000002 ***
## X1smsa 0.175787 0.015458 11.372 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3702 on 2996 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.305, Adjusted R-squared: 0.3036
## F-statistic: 219.2 on 6 and 2996 DF, p-value: < 0.00000000000000022
##
## Call:
## ivreg(formula = Y1 ~ Y2 + X1 | X1 + X2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.81301 -0.23805 0.01766 0.24727 1.32278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.162476 0.849590 4.899 0.000001012403 ***
## Y2 0.124164 0.049956 2.485 0.01299 *
## X1exper 0.055588 0.020286 2.740 0.00618 **
## X1black -0.115686 0.050741 -2.280 0.02268 *
## X1south -0.113165 0.023244 -4.869 0.000001182264 ***
## X1married -0.031975 0.005087 -6.286 0.000000000373 ***
## X1smsa 0.147707 0.030895 4.781 0.000001829342 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3843 on 2996 degrees of freedom
## Multiple R-Squared: 0.2513, Adjusted R-squared: 0.2498
## Wald test: 139.8 on 6 and 2996 DF, p-value: < 0.00000000000000022
##
## Call:
## lm(formula = Y2 ~ X1 + X2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6308 -1.4454 -0.0526 1.2986 6.3449
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.83070 0.13075 128.727 < 0.0000000000000002 ***
## X1exper -0.40443 0.00894 -45.238 < 0.0000000000000002 ***
## X1black -0.94753 0.09053 -10.467 < 0.0000000000000002 ***
## X1south -0.29735 0.07906 -3.761 0.000173 ***
## X1married -0.07269 0.01775 -4.096 0.000043142 ***
## X1smsa 0.42090 0.08487 4.959 0.000000747 ***
## X2 0.32728 0.08242 3.971 0.000073339 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.937 on 2996 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.4774, Adjusted R-squared: 0.4764
## F-statistic: 456.1 on 6 and 2996 DF, p-value: < 0.00000000000000022
## Warning: package 'estimatr' was built under R version 4.0.5
## Warning: package 'lfe' was built under R version 4.0.5
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lfe'
## The following object is masked from 'package:lmtest':
##
## waldtest
## The following object is masked from 'package:mlogit':
##
## waldtest
## Warning: package 'SteinIV' was built under R version 4.0.3
read_data <- function(df)
{
full_path <- paste("https://raw.github.com/scunning1975/mixtape/master/",
df, sep = "")
df <- read_dta(full_path)
return(df)
}
judge <- read_data("judge_fe.dta")
#grouped variable names from the data set
judge_pre <- judge %>%
select(starts_with("judge_")) %>%
colnames() %>%
subset(., . != "judge_pre_8") %>% # remove one for colinearity
paste(., collapse = " + ")
demo <- judge %>%
select(black, age, male, white) %>%
colnames() %>%
paste(., collapse = " + ")
off <- judge %>%
select(fel, mis, sum, F1, F2, F3, M1, M2, M3, M) %>%
colnames() %>%
paste(., collapse = " + ")
prior <- judge %>%
select(priorCases, priorWI5, prior_felChar,
prior_guilt, onePrior, threePriors) %>%
colnames() %>%
paste(., collapse = " + ")
control2 <- judge %>%
mutate(bailDate = as.numeric(bailDate)) %>%
select(day, day2, bailDate,
t1, t2, t3, t4, t5) %>% # all but one time period for colinearity
colnames() %>%
paste(., collapse = " + ")
#formulas used in the OLS
min_formula <- as.formula(paste("guilt ~ jail3 + ", control2))
max_formula <- as.formula(paste("guilt ~ jail3 + possess + robbery + DUI1st + drugSell + aggAss",
demo, prior, off, control2, sep = " + "))
#max variables and min variables
min_ols <- lm_robust(min_formula, data = judge)
max_ols <- lm_robust(max_formula, data = judge)
#--- Instrumental Variables Estimations
#-- 2sls main results
#- Min and Max Control formulas
min_formula <- as.formula(paste("guilt ~ ", control2, " | 0 | (jail3 ~ 0 +", judge_pre, ")"))
max_formula <- as.formula(paste("guilt ~", demo, "+ possess +", prior, "+ robbery +",
off, "+ DUI1st +", control2, "+ drugSell + aggAss | 0 | (jail3 ~ 0 +", judge_pre, ")"))
#2sls for min and max
min_iv <- felm(min_formula, data = judge)
summary(min_iv)
##
## Call:
## felm(formula = min_formula, data = judge)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6563 -0.4798 -0.3752 0.5159 0.6248
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.41621901427 0.12925456042 -3.220 0.00128 **
## day -0.00006573049 0.00002539727 -2.588 0.00965 **
## day2 0.00000008138 0.00000006072 1.340 0.18021
## bailDate 0.00005867932 0.00000849754 6.905 0.00000000000501 ***
## t1 0.00504910528 0.01600356539 0.315 0.75238
## t2 -0.01512167760 0.01256807593 -1.203 0.22891
## t3 0.00693751049 0.01099805912 0.631 0.52818
## t4 0.01748923008 0.00723265644 2.418 0.01560 *
## t5 0.00048306154 0.00506691013 0.095 0.92405
## `jail3(fit)` 0.15102193139 0.06517252622 2.317 0.02049 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5036 on 331961 degrees of freedom
## Multiple R-squared(full model): -0.01483 Adjusted R-squared: -0.01486
## Multiple R-squared(proj model): -0.01483 Adjusted R-squared: -0.01486
## F-statistic(full model): 271 on 9 and 331961 DF, p-value: < 0.00000000000000022
## F-statistic(proj model): 271 on 9 and 331961 DF, p-value: < 0.00000000000000022
## F-statistic(endog. vars): 5.37 on 1 and 331961 DF, p-value: 0.02049
##
## Call:
## felm(formula = max_formula, data = judge)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4475 -0.4315 -0.1008 0.4401 1.0786
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.51450590001 0.12278285184 -4.190 0.000027856863305 ***
## black 0.05904119227 0.00538958398 10.955 < 0.0000000000000002 ***
## age 0.00151199452 0.00011149721 13.561 < 0.0000000000000002 ***
## male -0.05157883981 0.00695302170 -7.418 0.000000000000119 ***
## white 0.10158565078 0.00362867489 27.995 < 0.0000000000000002 ***
## possess -0.05676290142 0.00380825255 -14.905 < 0.0000000000000002 ***
## priorCases -0.00576958909 0.00024645462 -23.410 < 0.0000000000000002 ***
## priorWI5 0.02671006781 0.00532909997 5.012 0.000000538623028 ***
## prior_felChar -0.00734279899 0.00076303854 -9.623 < 0.0000000000000002 ***
## prior_guilt 0.02368212064 0.00103371259 22.910 < 0.0000000000000002 ***
## onePrior 0.04771462151 0.00348531760 13.690 < 0.0000000000000002 ***
## threePriors -0.00037864493 0.00432975707 -0.087 0.93031
## robbery -0.09734393646 0.00788508542 -12.345 < 0.0000000000000002 ***
## fel -0.02575521316 0.00974496576 -2.643 0.00822 **
## mis 0.13383347366 0.01000540037 13.376 < 0.0000000000000002 ***
## sum 0.06570748246 0.00355838354 18.466 < 0.0000000000000002 ***
## F1 -0.00711101005 0.01032226943 -0.689 0.49089
## F2 0.03004989484 0.00967209282 3.107 0.00189 **
## F3 0.09448415468 0.00303210982 31.161 < 0.0000000000000002 ***
## M1 0.00953595454 0.00681785507 1.399 0.16191
## M2 -0.07508287320 0.00401005632 -18.724 < 0.0000000000000002 ***
## M3 0.12509802004 0.00467675602 26.749 < 0.0000000000000002 ***
## M 0.26092095123 0.00434965248 59.987 < 0.0000000000000002 ***
## DUI1st 0.05845092764 0.00622227794 9.394 < 0.0000000000000002 ***
## day -0.00006355492 0.00002410082 -2.637 0.00836 **
## day2 0.00000017065 0.00000005742 2.972 0.00296 **
## bailDate 0.00004069572 0.00000801089 5.080 0.000000377536435 ***
## t1 -0.02151296200 0.01513636065 -1.421 0.15524
## t2 -0.02686224416 0.01183655536 -2.269 0.02324 *
## t3 0.00074217176 0.01011568724 0.073 0.94151
## t4 0.01246339183 0.00661587284 1.884 0.05958 .
## t5 0.00743315550 0.00479140122 1.551 0.12082
## drugSell 0.03786549871 0.00594615289 6.368 0.000000000191674 ***
## aggAss 0.00851722429 0.00395531532 2.153 0.03129 *
## `jail3(fit)` 0.18326750791 0.06406789257 2.861 0.00423 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4745 on 331936 degrees of freedom
## Multiple R-squared(full model): 0.09922 Adjusted R-squared: 0.09913
## Multiple R-squared(proj model): 0.09922 Adjusted R-squared: 0.09913
## F-statistic(full model): 1258 on 34 and 331936 DF, p-value: < 0.00000000000000022
## F-statistic(proj model): 1258 on 34 and 331936 DF, p-value: < 0.00000000000000022
## F-statistic(endog. vars):8.183 on 1 and 331936 DF, p-value: 0.00423
#-- JIVE main results
#- minimum controls
y <- judge %>%
pull(guilt)
X_min <- judge %>%
mutate(bailDate = as.numeric(bailDate)) %>%
select(jail3, day, day2, t1, t2, t3, t4, t5, bailDate) %>%
model.matrix(data = .,~.)
Z_min <- judge %>%
mutate(bailDate = as.numeric(bailDate)) %>%
select(-judge_pre_8) %>%
select(starts_with("judge_pre"), day, day2, t1, t2, t3, t4, t5, bailDate) %>%
model.matrix(data = .,~.)
jive.est(y = y, X = X_min, Z = Z_min)
## $est
## [,1]
## (Intercept) -0.41641155755133
## jail3 0.16162168329879
## day -0.00006598006116
## day2 0.00000008084493
## t1 0.00501339889638
## t2 -0.01501299702306
## t3 0.00738752004020
## t4 0.01784906851827
## t5 0.00086968620067
## bailDate 0.00005838456069
#- maximum controls
X_max <- judge %>%
mutate(bailDate = as.numeric(bailDate)) %>%
select(jail3, white, age, male, black,
possess, robbery, prior_guilt,
prior_guilt, onePrior, priorWI5, prior_felChar, priorCases,
DUI1st, drugSell, aggAss, fel, mis, sum,
threePriors,
F1, F2, F3,
M, M1, M2, M3,
day, day2, bailDate,
t1, t2, t3, t4, t5) %>%
model.matrix(data = .,~.)
Z_max <- judge %>%
mutate(bailDate = as.numeric(bailDate)) %>%
select(-judge_pre_8) %>%
select(starts_with("judge_pre"), white, age, male, black,
possess, robbery, prior_guilt,
prior_guilt, onePrior, priorWI5, prior_felChar, priorCases,
DUI1st, drugSell, aggAss, fel, mis, sum,
threePriors,
F1, F2, F3,
M, M1, M2, M3,
day, day2, bailDate,
t1, t2, t3, t4, t5) %>%
model.matrix(data = .,~.)
jive.est(y = y, X = X_max, Z = Z_max)
## $est
## [,1]
## (Intercept) -0.5091836353645
## jail3 0.2070406470852
## white 0.1007642998757
## age 0.0015392197951
## male -0.0540142837392
## black 0.0573069056714
## possess -0.0561016431620
## robbery -0.0998043001543
## prior_guilt 0.0233485923735
## onePrior 0.0472882949917
## priorWI5 0.0250168681986
## prior_felChar -0.0075560318011
## priorCases -0.0057899714054
## DUI1st 0.0602722842840
## drugSell 0.0364823730029
## aggAss 0.0086107961336
## fel -0.0291081938674
## mis 0.1371619080530
## sum 0.0656284240280
## threePriors -0.0017099336239
## F1 -0.0107273289107
## F2 0.0266342898350
## F3 0.0941970804290
## M 0.2620939492889
## M1 0.0071651223654
## M2 -0.0762193332347
## M3 0.1238418794250
## day -0.0000646964027
## day2 0.0000001726166
## bailDate 0.0000400649704
## t1 -0.0219858146300
## t2 -0.0268037142406
## t3 0.0012013619443
## t4 0.0129390523962
## t5 0.0082769493844
data("mroz", package="PoEdata")
mroz1<- mroz[mroz$lfp==1,] #tomamos la muestra de quienes trabajan lfp=1
head(mroz1[,c("educ","wage","exper","mothereduc","fathereduc")])
## educ wage exper mothereduc fathereduc
## 1 12 3.3540 14 12 7
## 2 12 1.3889 5 7 7
## 3 12 4.5455 15 12 7
## 4 12 1.0965 6 7 7
## 5 14 4.5918 7 12 14
## 6 12 4.7421 33 14 7
## [1] 428 25
Sea la ecuación \[ln(salario)=\beta_0+\beta_1educ+\beta_2cap+\varepsilon\]
donde \(\beta_2\neq0\) (es decir, la variable cap, capacidad, que es inobservable, es una variable relevante).
Si estimamos por MCO:
\[ln(salario)=\beta_0+\beta_1educ+u\] con \[ u = \beta_2cap+\varepsilon\]
entonces \(\hat{\beta_1}\) es un estimador inconsistente de \(\beta_1\).
##
## Call:
## lm(formula = log(wage) ~ educ, data = mroz1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.10256 -0.31473 0.06434 0.40081 2.10029
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1852 0.1852 -1.000 0.318
## educ 0.1086 0.0144 7.545 0.000000000000276 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.68 on 426 degrees of freedom
## Multiple R-squared: 0.1179, Adjusted R-squared: 0.1158
## F-statistic: 56.93 on 1 and 426 DF, p-value: 0.0000000000002761
La estimación de \(\beta_1\) implica un porcentaje de casi el 11% más de salario por cada año extra de educación formal.
Posible Instrumento: educpadre. ¿Qué suponemos?
educpadre no está correlacionada con el error \(u\)
educacion y educpadre están correlacionadas
educ.ols <- lm(educ~fathereduc, data=mroz1)
kable(tidy(educ.ols), digits=4, align='c',caption= "Primera etapa en el modelo MC2E(2SLS) para la ecuación del salario")
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 10.2371 | 0.2759 | 37.0993 | 0 |
fathereduc | 0.2694 | 0.0286 | 9.4255 | 0 |
El estadÌstico \(t\) para el instrumento en esta forma reducida es \(9.4255\)
Se rechaza la hipótesis nula \(H_0:\pi_1=0\).
Por tanto, la educación (educ) está significativamente correlacionada con la educación del padre(fathereduc), es decir, tienen una relación positiva estadísticamente significativa.
Interpretación de la forma reducida:
La forma reducida descompone de forma aditiva la variable explicativa endógena en dos partes:
La parte exógena de \(X\), que es aquella explicada linealmente por los instrumentos (que son exógenos respecto al error del modelo), \(X=\pi_0 + \pi_1Z_1\)
La parte endógena de \(X\), que es lo que queda sin explicar por los instrumentos, es decir, el error de la forma reducida \(v\).
la forma reducida para la variable explicativa endógena incluye los instrumentos y todas las variables explicativas exógenas del modelo.
educHat <- fitted(educ.ols)
wage.2sls <- lm(log(wage)~educHat, data=mroz1)
kable(tidy(wage.2sls), digits=4, align='c',caption=
"Segunda etapa en el modelo de MC2E(2SLS) para la ecuación del salario")
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 0.4411 | 0.4671 | 0.9443 | 0.3455 |
educHat | 0.0592 | 0.0368 | 1.6081 | 0.1086 |
Comentarios:
El salario se incrementa en un 5.9% por año adicional de educación cuando usamos esta VI.
Esto sugiere que la estimación MCO es demasiado alta y es consistente con el sesgo por omisión de la variable capacidad.
Nótese también que los errores estándar de la estimación VI son sustancialmente mayores que los de la estimación MCO, tal y como sugiere la teoría.
En general, el estimador de VI tendrá una varianza mayor que el de MCO
Cuando realizas MC2E explícitamente(manualmnete) los errore estándar no son los correctos (ivreg sí lo calcula bien).
\[\hat{v}= \hat{educ} - (10.2371 + 0.2694*fathereduc)\]
y realizamos la regresión por MCO del modelo:
\[ln(salario)=\beta_0+\beta_1educ+\alpha*\hat{v}+\varepsilon\]
educHat <- fitted(educ.ols)
wage.hausman<- lm(log(wage)~educ+ educHat, data=mroz1)
kable(tidy(wage.hausman), digits=4, align='c',caption=
"Test de Hausman para la ecuación del salario")
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 0.4411 | 0.4393 | 1.0041 | 0.3159 |
educ | 0.1190 | 0.0158 | 7.5281 | 0.0000 |
educHat | -0.0598 | 0.0380 | -1.5717 | 0.1168 |
Contrastamos \(H_0:\alpha=0\) (educ es exógena)
Conclusión: No se rechaza la exogeneidad de edu pues \(t=-1.5717\)
\[\hat{educ}= 9.4801+0.1881*fathereduc+0.1564*mothereduc\]
educ.ols.multiple <- lm(educ~fathereduc+mothereduc, data=mroz1)
kable(tidy(educ.ols.multiple), digits=4, align='c',caption= "Primera etapa en el modelo 2SLS para la ecuación del salario")
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 9.4801 | 0.3211 | 29.5227 | 0 |
fathereduc | 0.1881 | 0.0336 | 5.5925 | 0 |
mothereduc | 0.1564 | 0.0358 | 4.3651 | 0 |
educ.ols.multiple<-lm(educ~mothereduc+fathereduc,data=mroz1)
linearHypothesis(educ.ols.multiple, c("mothereduc=0", "fathereduc=0"))
## Linear hypothesis test
##
## Hypothesis:
## mothereduc = 0
## fathereduc = 0
##
## Model 1: restricted model
## Model 2: educ ~ mothereduc + fathereduc
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 427 2230.2
## 2 425 1766.2 2 464.02 55.83 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Una regla empírica: rechazar la hipótesis nula para un valor de F -estadística mayor que 10 o, para el caso de un solo instrumento, un valor t-estadística mayor que 3,16, para asegurarse de que un instrumento es fuerte.
La estimación de MC2E utilizando fathereduc y mothereduc como instrumentos es ahora:
educHat.2<- fitted(educ.ols.multiple)
wage.2sls.multiple <- lm(log(wage)~educHat.2, data=mroz1)
kable(tidy(wage.2sls.multiple), digits=4, align='c',caption=
"Segunda etapa en el modelo de 2SLS para la ecuación del salario")
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 0.5510 | 0.4258 | 1.2941 | 0.1963 |
educHat.2 | 0.0505 | 0.0335 | 1.5061 | 0.1328 |
educHat.2<- fitted(educ.ols.multiple)
wage.2sls.m.Hausman<- lm(log(wage)~educ+ educHat.2, data=mroz1)
kable(tidy(wage.2sls.m.Hausman), digits=4, align='c',caption=
"Prueba de Hausman para la ecuación de salario")
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 0.5510 | 0.3994 | 1.3795 | 0.1685 |
educ | 0.1239 | 0.0161 | 7.6885 | 0.0000 |
educHat.2 | -0.0734 | 0.0353 | -2.0782 | 0.0383 |
Continuando con el último caso, teníamos dos instrumentos (fathereduc y mothereduc) para una variable potencialmente endógena (educ), con lo que tenemos 1 restricción de sobreidentificación.
Podemos por tanto evaluar parcialmente la validez de los instrumentos (es decir, la hipótesis nula de exogeneidad) contrastando la no correlación de los instrumentos con el término de error de la ecuación de interés utilizando un contraste de Sargan.
Para ello, calculamos los residuos de la estimación MC2E: \[\tilde{u}=log(wage)-(~0.5510+0.0505*educ)\]
y realizamos la regresión auxiliar de dichos residuos tanto sobre: - las variables exógenas que haya y - sobre los instrumentos utilizados
uhat<-residuals(wage.2sls.multiple)
sargan<-lm(uhat~fathereduc+ mothereduc,data = mroz1)
nR2<-summary(sargan)$r.squared*nrow(mroz1)
nR2
## [1] 0.3276003
## [1] 0.5670755
data("mroz", package="PoEdata")
mroz1 <- mroz[mroz$lfp==1,] #restricts sample to lfp=1.
mroz1.ols <- lm(log(wage)~educ+exper+I(exper^2), data=mroz1)
wage.2sls <- lm(log(wage)~educHat+exper+I(exper^2), data=mroz1)
mroz1.iv <- ivreg(log(wage)~educ+exper+I(exper^2)|
exper+I(exper^2)+fathereduc, data=mroz1)
mroz1.iv1 <- ivreg(log(wage)~educ+exper+I(exper^2)|
exper+I(exper^2)+mothereduc+fathereduc,
data=mroz1)
stargazer(mroz1.ols, wage.2sls, mroz1.iv, mroz1.iv1,
title="Ecuación de salario: MCO, MC2E, y modelos de VI vía ivreg()",
header=FALSE,
type="text",
keep.stat="n",
omit.table.layout="n",
star.cutoffs=NA,
digits=4,
# single.row=TRUE,
intercept.bottom=FALSE, #moves the intercept coef to top
column.labels=c("MCO","(MC2E manual)", "(IV fathereduc)",
"(IV fathereduc y mothereduc)"),
dep.var.labels.include = FALSE,
model.numbers = FALSE,
dep.var.caption="Variable dependendiente: log(wage)",
model.names=FALSE,
star.char=NULL) #supresses the stars)
##
## Ecuación de salario: MCO, MC2E, y modelos de VI vía ivreg()
## ================================================================================
## Variable dependendiente: log(wage)
## -------------------------------------------------------------------
## MCO (MC2E manual) (IV fathereduc) (IV fathereduc y mothereduc)
## --------------------------------------------------------------------------------
## Constant -0.5220 -0.0885 -0.0611 0.0481
## (0.1986) (0.4733) (0.4364) (0.4003)
##
## educ 0.1075 0.0702 0.0614
## (0.0141) (0.0344) (0.0314)
##
## educHat 0.0705
## (0.0364)
##
## exper 0.0416 0.0470 0.0437 0.0442
## (0.0132) (0.0140) (0.0134) (0.0134)
##
## I(exper2) -0.0008 -0.0010 -0.0009 -0.0009
## (0.0004) (0.0004) (0.0004) (0.0004)
##
## --------------------------------------------------------------------------------
## Observations 428 428 428 428
## ================================================================================
Comentario:
`
Sea el modelo
mroz1.iv1 <- ivreg(log(wage)~educ+exper+I(exper^2)|exper+I(exper^2)+mothereduc+
fathereduc,data=mroz1)
##
## Call:
## ivreg(formula = log(wage) ~ educ + exper + I(exper^2) | exper +
## I(exper^2) + mothereduc + fathereduc, data = mroz1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0986 -0.3196 0.0551 0.3689 2.3493
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0481003 0.4003281 0.120 0.90442
## educ 0.0613966 0.0314367 1.953 0.05147 .
## exper 0.0441704 0.0134325 3.288 0.00109 **
## I(exper^2) -0.0008990 0.0004017 -2.238 0.02574 *
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments 2 423 55.400 <0.0000000000000002 ***
## Wu-Hausman 1 423 2.793 0.0954 .
## Sargan 1 NA 0.378 0.5386
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6747 on 424 degrees of freedom
## Multiple R-Squared: 0.1357, Adjusted R-squared: 0.1296
## Wald test: 8.141 on 3 and 424 DF, p-value: 0.00002787
Dado que usar VI cuando no es necesario empeora nuestras estimaciones, nos gustaría probar si las variables que nos preocupan son realmente endógenas.
Conclusión: Se rechaza la hipótesis nuloa de que la variable de interés no está correlacionada con el término de error, lo que indica que educ es endógena.
Para que se identifique un modelo, el número de instrumentos debe ser al menos igual al número de variables endógenas. Si hay más instrumentos que variables endógenas, se dice que el modelo está sobreidentificado
Conclusión: No rechaza la hipótesis nula, lo que significa que los instrumentos adicionales son válidos (no están correlacionados con el término de error)