rm(list=ls())
#setwd("C:/DAVE2/CIDE/Doc/Econometria II/Lab_8B")
options(scipen=999) 

library(AER)  #Econometría Aplicada con R
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(plm)  #Ajuste de modelos con datos panel
library(stargazer) # Para salidas de las regresiones
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(tseries) # for `adf.test()`
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(dynlm) #for function `dynlm()`
library(vars) # for function `VAR()`
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: urca
library(nlWaldTest) # for the `nlWaldtest()` function
library(lmtest) #for `coeftest()` and `bptest()`.
library(broom) #for `glance(`) and `tidy()`
library(car) #for `hccm()` robust standard errors
library(sandwich)
library(knitr) #for `kable()`
library(forecast) 
library(systemfit)
## Loading required package: Matrix
## 
## Please cite the 'systemfit' package as:
## Arne Henningsen and Jeff D. Hamann (2007). systemfit: A Package for Estimating Systems of Simultaneous Equations in R. Journal of Statistical Software 23(4), 1-40. http://www.jstatsoft.org/v23/i04/.
## 
## If you have questions, suggestions, or comments regarding the 'systemfit' package, please use a forum or 'tracker' at systemfit's R-Forge site:
## https://r-forge.r-project.org/projects/systemfit/
library(AER)
library(xtable)
library(readxl)
library(ggplot2)

Errores Robustos

Para calcular errores estandar robustos necesitamos una paqueteria llamada sandwich y ua llamada lmtest.

library(lmtest)
library(sandwich)

Ejemplo 1

Queremos investigar la relacion entre ahorro e ingresos. Podemos hacerlo con los datos del Wooldridge.

library(wooldridge)
## 
## Attaching package: 'wooldridge'
## The following object is masked from 'package:MASS':
## 
##     cement
data("saving")

Solo vamos a utilizar los valores positivos para ahorro (saving) que son menores que el ingreso

¿Sobre quienes seria la inferencia?

summary(saving)
##       sav               inc             size            educ      
##  Min.   :-5577.0   Min.   :  750   Min.   : 2.00   Min.   : 2.00  
##  1st Qu.:  194.5   1st Qu.: 6510   1st Qu.: 3.00   1st Qu.: 9.00  
##  Median :  982.0   Median : 8776   Median : 4.00   Median :12.00  
##  Mean   : 1582.5   Mean   : 9941   Mean   : 4.35   Mean   :11.58  
##  3rd Qu.: 1834.8   3rd Qu.:11903   3rd Qu.: 5.00   3rd Qu.:13.00  
##  Max.   :25405.0   Max.   :32080   Max.   :10.00   Max.   :20.00  
##       age            black           cons       
##  Min.   :26.00   Min.   :0.00   Min.   :-13055  
##  1st Qu.:33.00   1st Qu.:0.00   1st Qu.:  5732  
##  Median :38.50   Median :0.00   Median :  7562  
##  Mean   :38.77   Mean   :0.07   Mean   :  8359  
##  3rd Qu.:44.00   3rd Qu.:0.00   3rd Qu.:  9864  
##  Max.   :54.00   Max.   :1.00   Max.   : 30280

Filtramos nuestra base:

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::between()    masks plm::between()
## x stringr::boundary() masks strucchange::boundary()
## x tidyr::expand()     masks Matrix::expand()
## x dplyr::filter()     masks stats::filter()
## x dplyr::lag()        masks plm::lag(), stats::lag()
## x dplyr::lead()       masks plm::lead()
## x tidyr::pack()       masks Matrix::pack()
## x dplyr::recode()     masks car::recode()
## x dplyr::select()     masks MASS::select()
## x purrr::some()       masks car::some()
## x tidyr::unpack()     masks Matrix::unpack()
saving <- saving %>% filter(sav > 0, inc < 20000,
         sav < inc)

Obtenemos de nuevo estadisticas descriptivas

summary(saving)
##       sav              inc             size             educ      
##  Min.   :   6.0   Min.   : 1920   Min.   : 2.000   Min.   : 2.00  
##  1st Qu.: 631.5   1st Qu.: 6600   1st Qu.: 3.000   1st Qu.: 9.00  
##  Median :1105.0   Median : 8703   Median : 4.000   Median :12.00  
##  Mean   :1616.2   Mean   : 9252   Mean   : 4.333   Mean   :11.49  
##  3rd Qu.:2033.5   3rd Qu.:11100   3rd Qu.: 5.000   3rd Qu.:12.50  
##  Max.   :6120.0   Max.   :19362   Max.   :10.000   Max.   :20.00  
##       age            black              cons      
##  Min.   :27.00   Min.   :0.00000   Min.   : 1366  
##  1st Qu.:33.00   1st Qu.:0.00000   1st Qu.: 5664  
##  Median :38.00   Median :0.00000   Median : 7210  
##  Mean   :38.21   Mean   :0.06667   Mean   : 7636  
##  3rd Qu.:44.00   3rd Qu.:0.00000   3rd Qu.: 9438  
##  Max.   :54.00   Max.   :1.00000   Max.   :16772

Pltemaos la relacion

ggplot(saving, aes(x = inc, y = sav)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Annual income", y = "Annual savings")
## `geom_smooth()` using formula 'y ~ x'

La linea de regresion en la grafica muestra una clara relacion positiva entre el ahorro y el ingreso.

¿ Que podemos notar?

Noten que conforme el ingreso aumenta, las diferencias entre las observaciones y la linea de regresion aumentan.

¿Esto que significa? ¿Por que me importa?

Esto significa que hay mayor incertidumbre sobre la relacion estimada entre las dos variables para niveles altos de ingreso.

Esto nos importa porque indica que hay HETEROCEDASTICIDAD

Estimemos el modelo de forma clasica

ahorro <- lm(sav ~ inc, data = saving)
summary(ahorro)
## 
## Call:
## lm(formula = sav ~ inc, data = saving)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2667.8  -874.5  -302.7   431.1  4606.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 316.19835  462.06882   0.684  0.49595   
## inc           0.14052    0.04672   3.007  0.00361 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1413 on 73 degrees of freedom
## Multiple R-squared:  0.1102, Adjusted R-squared:  0.09805 
## F-statistic: 9.044 on 1 and 73 DF,  p-value: 0.003613

Como sabemos que hay heterocedasticidad, los valores t reportados no nos dan la informacion que necesitamos

Vamos a calcular los estadisticos t teniendo en cuenta los errores estandar robustos

Coeftest esta en el paquete lmtest , se puede usar con la funcion vcovHC que esta en la paqueteria sandwich

Argumentos de la funcion coeftest 1. Resultado de la regresion que obtuvimos originalmente (ahorro) 2. vcov matriz de varianza-covarianza la funcion vcovHC produce la matriz que permite incorporar disintos tipos de heterocedasticidad

En este caso vamos a utilizar el error estandar de White que esta indicado por type = “HC0”

coeftest(ahorro, vcov = vcovHC(ahorro, type = "HC0"))
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 316.198354 414.728032  0.7624 0.448264   
## inc           0.140515   0.048805  2.8791 0.005229 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Prueba F para la significancia conjunta

Vamos a estimar el modelo no restringido O sea el modelo con todas las variables

ahorro_unres <- lm(sav ~ inc + size + educ + age, data = saving)

F test

Para hacer la prueba de significancia conjunta utilizamos la funcion ANOVA

anova(ahorro, ahorro_unres)
## Analysis of Variance Table
## 
## Model 1: sav ~ inc
## Model 2: sav ~ inc + size + educ + age
##   Res.Df       RSS Df Sum of Sq      F Pr(>F)
## 1     73 145846877                           
## 2     70 144286605  3   1560272 0.2523 0.8594

Asi como esta no toma en cuenta el problema de heterocedasticidad

De nuevo tenemos que especificar la matriz de varianza

waldtest(ahorro, ahorro_unres, vcov = vcovHC(ahorro_unres, type = "HC0"))
## Wald test
## 
## Model 1: sav ~ inc
## Model 2: sav ~ inc + size + educ + age
##   Res.Df Df      F Pr(>F)
## 1     73                 
## 2     70  3 0.3625 0.7803

¿Que concluimos?

Ejemplo II

Cargamos los datos

library(foreign)
fpe <- read.dta("https://data.princeton.edu/wws509/datasets/effort.dta")

Creamos una variable categorica:

fpe$effortg = cut(fpe$effort,  breaks=c(min(fpe$effort),5,15,max(fpe$effort)), 
                  right=FALSE, include.lowest=TRUE, labels=c("Weak","Moderate","Strong"))

Calculamos nuestra regresion:

m <- lm(change ~ setting + effortg, data = fpe)
summary(m)
## 
## Call:
## lm(formula = change ~ setting + effortg, data = fpe)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0386  -2.8198   0.1036   1.3269  11.4416 
## 
## Coefficients:
##                 Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)      -5.9540     7.1660  -0.831     0.418    
## setting           0.1693     0.1056   1.604     0.128    
## effortgModerate   4.1439     3.1912   1.299     0.213    
## effortgStrong    19.4476     3.7293   5.215 0.0000851 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.732 on 16 degrees of freedom
## Multiple R-squared:  0.8016, Adjusted R-squared:  0.7644 
## F-statistic: 21.55 on 3 and 16 DF,  p-value: 0.000007262

Calculamos nuestro errores robustos:

coeftest(m, vcov = vcovHC(m, type="HC1"))
## 
## t test of coefficients:
## 
##                  Estimate Std. Error t value    Pr(>|t|)    
## (Intercept)     -5.954036   2.707697 -2.1989     0.04294 *  
## setting          0.169268   0.045432  3.7258     0.00184 ** 
## effortgModerate  4.143915   3.191220  1.2985     0.21251    
## effortgStrong   19.447609   2.567472  7.5746 0.000001118 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

¿Como se comprara los dos modelos?

m2<-coeftest(m, vcov = vcovHC(m, type="HC1"))
stargazer::stargazer(m,m2, type="text")
## 
## ======================================================
##                            Dependent variable:        
##                     ----------------------------------
##                             change                    
##                              OLS           coefficient
##                                               test    
##                              (1)               (2)    
## ------------------------------------------------------
## setting                     0.169           0.169***  
##                            (0.106)           (0.045)  
##                                                       
## effortgModerate             4.144             4.144   
##                            (3.191)           (3.191)  
##                                                       
## effortgStrong             19.448***         19.448*** 
##                            (3.729)           (2.567)  
##                                                       
## Constant                    -5.954          -5.954**  
##                            (7.166)           (2.708)  
##                                                       
## ------------------------------------------------------
## Observations                  20                      
## R2                          0.802                     
## Adjusted R2                 0.764                     
## Residual Std. Error    5.732 (df = 16)                
## F Statistic         21.554*** (df = 3; 16)            
## ======================================================
## Note:                      *p<0.1; **p<0.05; ***p<0.01

Algunos errores estandar robustos son menores el efecto de setting ahora es significativo

Errores Estandar Agrupados

Una posibilidad para hacer errores agrupado es especificando en el summary la variable de interes:

summary(lm.object, cluster=c("variable"))

Vamos a usar los errores estandar agrupados de los ejemplos anteriores

Ejemplo 1

summary(ahorro_unres, cluster=c("black"))
## 
## Call:
## lm(formula = sav ~ inc + size + educ + age, data = saving)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2509.4  -958.5  -339.7   263.8  4602.3 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -389.63835 1474.67951  -0.264   0.7924  
## inc            0.12627    0.05534   2.282   0.0256 *
## size          65.66259  120.51371   0.545   0.5876  
## educ          38.29397   57.24433   0.669   0.5057  
## age            2.95580   26.96923   0.110   0.9130  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1436 on 70 degrees of freedom
## Multiple R-squared:  0.1198, Adjusted R-squared:  0.06946 
## F-statistic: 2.381 on 4 and 70 DF,  p-value: 0.05976

Y luego

summary(ahorro_unres, cluster=c("educ"))
## 
## Call:
## lm(formula = sav ~ inc + size + educ + age, data = saving)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2509.4  -958.5  -339.7   263.8  4602.3 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -389.63835 1474.67951  -0.264   0.7924  
## inc            0.12627    0.05534   2.282   0.0256 *
## size          65.66259  120.51371   0.545   0.5876  
## educ          38.29397   57.24433   0.669   0.5057  
## age            2.95580   26.96923   0.110   0.9130  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1436 on 70 degrees of freedom
## Multiple R-squared:  0.1198, Adjusted R-squared:  0.06946 
## F-statistic: 2.381 on 4 and 70 DF,  p-value: 0.05976

¿Como lo podriamos justificar?

Ejemplo 2

summary(m, cluster=c("country"))
## 
## Call:
## lm(formula = change ~ setting + effortg, data = fpe)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0386  -2.8198   0.1036   1.3269  11.4416 
## 
## Coefficients:
##                 Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)      -5.9540     7.1660  -0.831     0.418    
## setting           0.1693     0.1056   1.604     0.128    
## effortgModerate   4.1439     3.1912   1.299     0.213    
## effortgStrong    19.4476     3.7293   5.215 0.0000851 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.732 on 16 degrees of freedom
## Multiple R-squared:  0.8016, Adjusted R-squared:  0.7644 
## F-statistic: 21.55 on 3 and 16 DF,  p-value: 0.000007262

Ejemplo 3

library(webuse)
library(dplyr)
nlswork_orig <- webuse('nlswork')
nlswork <- filter(nlswork_orig, idcode <= 100) %>%
  select(idcode, year, ln_wage, age, tenure, union) %>%
  filter(complete.cases(.)) %>%
  mutate(union = as.integer(union),
         idcode = as.factor(idcode))
str(nlswork)
## tibble [386 x 6] (S3: tbl_df/tbl/data.frame)
##  $ idcode : Factor w/ 82 levels "1","2","3","4",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ year   : num [1:386] 72 77 80 83 85 87 88 71 77 78 ...
##   ..- attr(*, "label")= chr "interview year"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ ln_wage: num [1:386] 1.59 1.78 2.55 2.42 2.61 ...
##   ..- attr(*, "label")= chr "ln(wage/GNP deflator)"
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ age    : num [1:386] 20 25 28 31 33 35 37 19 25 26 ...
##   ..- attr(*, "label")= chr "age in current year"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ tenure : num [1:386] 0.917 1.5 1.833 0.667 1.917 ...
##   ..- attr(*, "label")= chr "job tenure, in years"
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ union  : int [1:386] 1 0 1 1 1 1 1 0 1 1 ...
##  - attr(*, "label")= chr "National Longitudinal Survey.  Young Women 14-26 years of age in 1968"

Analizamos la base:

summary(nlswork)
##      idcode         year          ln_wage            age           tenure      
##  9      : 12   Min.   :70.00   Min.   :0.4733   Min.   :18.0   Min.   : 0.000  
##  20     : 12   1st Qu.:73.00   1st Qu.:1.6131   1st Qu.:25.0   1st Qu.: 1.167  
##  6      : 11   Median :80.00   Median :1.9559   Median :31.0   Median : 2.417  
##  16     : 11   Mean   :79.61   Mean   :1.9453   Mean   :30.8   Mean   : 3.636  
##  22     : 11   3rd Qu.:85.00   3rd Qu.:2.2349   3rd Qu.:36.0   3rd Qu.: 4.958  
##  24     : 11   Max.   :88.00   Max.   :3.5791   Max.   :45.0   Max.   :19.000  
##  (Other):318                                                                   
##      union       
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.2591  
##  3rd Qu.:1.0000  
##  Max.   :1.0000  
## 

Analizamos como se distribuyen los idnividuos:

summary(as.integer(table(nlswork$idcode)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   4.000   4.707   7.000  12.000

Concentramonos en la distribucion:

y_mean_sd_cl <- sapply(levels(nlswork$idcode), function(idcode) {
  y_cl <- nlswork$ln_wage[nlswork$idcode == idcode]
  c(mean(y_cl), sd(y_cl))
})
hist(y_mean_sd_cl[1,], breaks = 20,
     main = 'Histogram of DV means per subject', xlab = NA)

Comparamos la sd de los individuos especificos ocn la promedia:

c(sd(y_mean_sd_cl[1,]), mean(y_mean_sd_cl[2,], na.rm = TRUE))
## [1] 0.4038449 0.2221142

Vamos a calcular un modelo con efectos fijos pero sin ajustar el error:

m1 <- lm(ln_wage ~ age + tenure + union + tenure:union + idcode,
         data = nlswork)
summary(m1)
## 
## Call:
## lm(formula = ln_wage ~ age + tenure + union + tenure:union + 
##     idcode, data = nlswork)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.96463 -0.09405  0.00000  0.11460  1.23525 
## 
## Coefficients:
##                 Estimate  Std. Error t value             Pr(>|t|)    
## (Intercept)   1.88247823  0.13141150  14.325 < 0.0000000000000002 ***
## age           0.00563081  0.00310980   1.811             0.071193 .  
## tenure        0.02075643  0.00696442   2.980             0.003115 ** 
## union         0.17461939  0.06064604   2.879             0.004272 ** 
## idcode2      -0.61738650  0.12853290  -4.803      0.0000024682119 ***
## idcode3      -0.56251455  0.13285002  -4.234      0.0000305327004 ***
## idcode4      -0.30061187  0.12907391  -2.329             0.020524 *  
## idcode5      -0.19265223  0.14219753  -1.355             0.176494    
## idcode6      -0.44858870  0.13180781  -3.403             0.000756 ***
## idcode7      -0.80315152  0.15428882  -5.206      0.0000003596300 ***
## idcode9      -0.47790720  0.13484218  -3.544             0.000457 ***
## idcode10     -0.74218183  0.15478622  -4.795      0.0000025668272 ***
## idcode12      0.53789363  0.21755050   2.473             0.013971 *  
## idcode13      0.20808581  0.13915025   1.495             0.135860    
## idcode14     -0.04163192  0.21128188  -0.197             0.843926    
## idcode15      0.14709668  0.13575405   1.084             0.279433    
## idcode16     -0.00201464  0.13162766  -0.015             0.987799    
## idcode17      0.09351768  0.14733897   0.635             0.526101    
## idcode18     -0.66740312  0.16572937  -4.027      0.0000716062148 ***
## idcode19     -0.12664554  0.13087668  -0.968             0.333988    
## idcode20     -0.08582542  0.12310650  -0.697             0.486240    
## idcode21     -0.34260332  0.18031897  -1.900             0.058394 .  
## idcode22      0.02695311  0.13003991   0.207             0.835941    
## idcode23     -0.28058507  0.15626679  -1.796             0.073572 .  
## idcode24     -0.01601267  0.12961023  -0.124             0.901758    
## idcode25      0.01423082  0.13600122   0.105             0.916733    
## idcode26     -0.32256359  0.16498933  -1.955             0.051505 .  
## idcode27      0.06602825  0.20743209   0.318             0.750469    
## idcode29     -0.14670491  0.20766914  -0.706             0.480465    
## idcode30     -0.58541724  0.16153875  -3.624             0.000341 ***
## idcode35     -0.93858587  0.27382444  -3.428             0.000694 ***
## idcode36     -0.10247830  0.17507040  -0.585             0.558749    
## idcode38     -0.62263928  0.17975262  -3.464             0.000610 ***
## idcode39      0.08848647  0.15872224   0.557             0.577607    
## idcode40     -0.25827219  0.27431351  -0.942             0.347195    
## idcode41     -0.44506222  0.16450713  -2.705             0.007212 ** 
## idcode43     -0.48361718  0.20791373  -2.326             0.020682 *  
## idcode44      0.62108242  0.13613900   4.562      0.0000073905322 ***
## idcode45     -0.38833649  0.13832909  -2.807             0.005322 ** 
## idcode46     -0.92357018  0.21336379  -4.329      0.0000204709575 ***
## idcode47     -0.61227673  0.20952831  -2.922             0.003740 ** 
## idcode48     -0.64692982  0.27539126  -2.349             0.019466 *  
## idcode49     -0.42736527  0.16451750  -2.598             0.009848 ** 
## idcode50     -0.14225891  0.20757404  -0.685             0.493658    
## idcode51     -0.50667946  0.15468208  -3.276             0.001178 ** 
## idcode53     -0.59031362  0.20756685  -2.844             0.004762 ** 
## idcode54     -0.50741949  0.27742394  -1.829             0.068386 .  
## idcode55     -0.50757323  0.15430814  -3.289             0.001124 ** 
## idcode56      0.45211477  0.20774037   2.176             0.030309 *  
## idcode57     -0.65974072  0.13931329  -4.736      0.0000033725231 ***
## idcode58     -0.37191622  0.17936873  -2.073             0.038981 *  
## idcode59     -0.77299928  0.20917022  -3.696             0.000261 ***
## idcode60     -0.47289347  0.18074948  -2.616             0.009339 ** 
## idcode61     -0.67260453  0.18066274  -3.723             0.000235 ***
## idcode62     -0.64158254  0.16556515  -3.875             0.000131 ***
## idcode63      0.44917325  0.14172277   3.169             0.001685 ** 
## idcode64     -0.81456251  0.17471361  -4.662      0.0000047126831 ***
## idcode65      0.02603803  0.20364083   0.128             0.898343    
## idcode66     -0.94312615  0.13597280  -6.936      0.0000000000248 ***
## idcode67      0.05386553  0.15492242   0.348             0.728314    
## idcode68     -0.11396387  0.27351249  -0.417             0.677219    
## idcode69     -0.41926248  0.18014817  -2.327             0.020613 *  
## idcode70      0.36914062  0.22196806   1.663             0.097350 .  
## idcode71     -0.59954614  0.14204279  -4.221      0.0000322865767 ***
## idcode72     -0.31390636  0.15968793  -1.966             0.050250 .  
## idcode73     -0.29950379  0.13530317  -2.214             0.027610 *  
## idcode75     -0.26317345  0.13260663  -1.985             0.048098 *  
## idcode76     -0.35951817  0.27391131  -1.313             0.190343    
## idcode77     -0.64285851  0.17978854  -3.576             0.000407 ***
## idcode78      0.20608055  0.13373338   1.541             0.124376    
## idcode80      0.14951759  0.18891872   0.791             0.429313    
## idcode81     -0.89637552  0.27083192  -3.310             0.001048 ** 
## idcode82      0.00008082  0.16221523   0.000             0.999603    
## idcode83     -0.41009241  0.14211346  -2.886             0.004189 ** 
## idcode85      0.06178784  0.21024749   0.294             0.769052    
## idcode86      0.15498156  0.13766550   1.126             0.261157    
## idcode87      0.24987344  0.27369258   0.913             0.361991    
## idcode89      0.31465302  0.20781542   1.514             0.131054    
## idcode92     -0.27823987  0.27059948  -1.028             0.304668    
## idcode93     -0.13122980  0.27387215  -0.479             0.632171    
## idcode94     -0.82293380  0.16339833  -5.036      0.0000008204384 ***
## idcode95     -0.56759866  0.27400393  -2.071             0.039167 *  
## idcode97     -0.63020474  0.27581166  -2.285             0.023016 *  
## idcode98     -0.31100644  0.15961161  -1.949             0.052285 .  
## idcode99     -0.16428103  0.18080683  -0.909             0.364291    
## tenure:union  0.01497411  0.00954851   1.568             0.117885    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2525 on 300 degrees of freedom
## Multiple R-squared:  0.7554, Adjusted R-squared:  0.6861 
## F-statistic:  10.9 on 85 and 300 DF,  p-value: < 0.00000000000000022

Analizamos los resultado:

m1coeffs_std <- data.frame(summary(m1)$coefficients)
coi_indices <- which(!startsWith(row.names(m1coeffs_std), 'idcode'))
m1coeffs_std[coi_indices,]
##                 Estimate  Std..Error   t.value
## (Intercept)  1.882478232 0.131411504 14.325064
## age          0.005630809 0.003109803  1.810664
## tenure       0.020756426 0.006964417  2.980353
## union        0.174619394 0.060646038  2.879321
## tenure:union 0.014974113 0.009548509  1.568215
##                                                  Pr...t..
## (Intercept)  0.000000000000000000000000000000000008022367
## age          0.071193152679204424471137713226198684424162
## tenure       0.003114741807900340241077596914465175359510
## union        0.004272026942067721523577095155133065418340
## tenure:union 0.117885143124772509559683442148525500670075

Como era de esperar, la permanencia en el empleo y especialmente la afiliación a un sindicato se asocian positivamente con el salario. El coeficiente del término de interacción muestra que con la afiliación a un sindicato el efecto de permanencia en el trabajo es incluso un poco más alto, aunque no significativamente.

Como corregimos por los errores agrupados:

Lmtest

library(sandwich)
library(lmtest)
m1coeffs_cl <- coeftest(m1, vcov = vcovCL, cluster = ~idcode)
m1coeffs_cl[coi_indices,]
##                 Estimate  Std. Error    t value
## (Intercept)  1.882478232 0.157611390 11.9437956
## age          0.005630809 0.006339777  0.8881715
## tenure       0.020756426 0.011149190  1.8616981
## union        0.174619394 0.101970509  1.7124500
## tenure:union 0.014974113 0.009646023  1.5523613
##                                        Pr(>|t|)
## (Intercept)  0.00000000000000000000000000366797
## age          0.37516007328543043986002203382668
## tenure       0.06362342214400516782202288368353
## union        0.08784707821388915149896092771087
## tenure:union 0.12163006343100070394402933970923

Este paquete nos da varias opciones como la para calcular los intervalos de confianzas:

(m1cis <- coefci(m1, parm = coi_indices, vcov = vcovCL,
                 cluster = ~idcode))
##                     2.5 %     97.5 %
## (Intercept)   1.572314302 2.19264216
## age          -0.006845258 0.01810688
## tenure       -0.001184099 0.04269695
## union        -0.026048678 0.37528746
## tenure:union -0.004008324 0.03395655

Esto es realmente importante, ya que de lo contrario se aplica de forma predeterminada la estimación de covarianza clásica (no agrupada). Esto, debido a los SE más bajos, conduce a IC más estrechos:

coefci(m1, parm = coi_indices)
##                      2.5 %     97.5 %
## (Intercept)   1.6238731375 2.14108333
## age          -0.0004889822 0.01175060
## tenure        0.0070511278 0.03446172
## union         0.0552738733 0.29396491
## tenure:union -0.0038164264 0.03376465

Tenure e union ya no contienen el 0.

Es mas eficeinte calcular indipendentemiente la matriz de varianza y covarianza.

cl_vcov_mat <- vcovCL(m1, cluster = ~idcode)

para luego pasaral como argumento:

m1coeffs_cl2 <- coeftest(m1, vcov = cl_vcov_mat)
all(near(m1coeffs_cl[,2], m1coeffs_cl2[,2]))
## [1] TRUE
m1coeffs_cl2
## 
## t test of coefficients:
## 
##                  Estimate   Std. Error  t value              Pr(>|t|)    
## (Intercept)   1.882478232  0.157611390  11.9438 < 0.00000000000000022 ***
## age           0.005630809  0.006339777   0.8882             0.3751601    
## tenure        0.020756426  0.011149190   1.8617             0.0636234 .  
## union         0.174619394  0.101970509   1.7124             0.0878471 .  
## idcode2      -0.617386497  0.020561516 -30.0263 < 0.00000000000000022 ***
## idcode3      -0.562514546  0.085371134  -6.5890 0.0000000001985501287 ***
## idcode4      -0.300611867  0.045217648  -6.6481 0.0000000001401240754 ***
## idcode5      -0.192652226  0.080696916  -2.3874             0.0175891 *  
## idcode6      -0.448588703  0.076998894  -5.8259 0.0000000146308955759 ***
## idcode7      -0.803151524  0.077586665 -10.3517 < 0.00000000000000022 ***
## idcode9      -0.477907202  0.077282396  -6.1839 0.0000000020419276543 ***
## idcode10     -0.742181831  0.075596923  -9.8176 < 0.00000000000000022 ***
## idcode12      0.537893629  0.121048674   4.4436 0.0000124605920197447 ***
## idcode13      0.208085814  0.092384493   2.2524             0.0250204 *  
## idcode14     -0.041631921  0.125721970  -0.3311             0.7407678    
## idcode15      0.147096675  0.020541338   7.1610 0.0000000000062086437 ***
## idcode16     -0.002014638  0.082613716  -0.0244             0.9805607    
## idcode17      0.093517679  0.074920103   1.2482             0.2129194    
## idcode18     -0.667403115  0.093227938  -7.1588 0.0000000000062933735 ***
## idcode19     -0.126645535  0.074677689  -1.6959             0.0909432 .  
## idcode20     -0.085825422  0.049171754  -1.7454             0.0819354 .  
## idcode21     -0.342603325  0.077785362  -4.4045 0.0000147717344307811 ***
## idcode22      0.026953107  0.081174846   0.3320             0.7400927    
## idcode23     -0.280585071  0.099365805  -2.8238             0.0050641 ** 
## idcode24     -0.016012667  0.084325730  -0.1899             0.8495232    
## idcode25      0.014230819  0.091684772   0.1552             0.8767565    
## idcode26     -0.322563590  0.088854385  -3.6302             0.0003328 ***
## idcode27      0.066028252  0.074750627   0.8833             0.3777741    
## idcode29     -0.146704912  0.074887273  -1.9590             0.0510377 .  
## idcode30     -0.585417239  0.057001836 -10.2701 < 0.00000000000000022 ***
## idcode35     -0.938585872  0.083696844 -11.2141 < 0.00000000000000022 ***
## idcode36     -0.102478303  0.028295522  -3.6217             0.0003435 ***
## idcode38     -0.622639283  0.078059836  -7.9764 0.0000000000000320177 ***
## idcode39      0.088486469  0.117163740   0.7552             0.4506989    
## idcode40     -0.258272186  0.077119289  -3.3490             0.0009146 ***
## idcode41     -0.445062221  0.074505142  -5.9736 0.0000000065623943907 ***
## idcode43     -0.483617177  0.081599952  -5.9267 0.0000000084788838298 ***
## idcode44      0.621082420  0.034820934  17.8365 < 0.00000000000000022 ***
## idcode45     -0.388336488  0.074270196  -5.2287 0.0000003206490130406 ***
## idcode46     -0.923570183  0.142324438  -6.4892 0.0000000003561300452 ***
## idcode47     -0.612276731  0.107590090  -5.6908 0.0000000300678356522 ***
## idcode48     -0.646929824  0.108003964  -5.9899 0.0000000060013011373 ***
## idcode49     -0.427365275  0.081746977  -5.2279 0.0000003219158113920 ***
## idcode50     -0.142258913  0.086063210  -1.6530             0.0993854 .  
## idcode51     -0.506679459  0.080025891  -6.3314 0.0000000008846860519 ***
## idcode53     -0.590313624  0.075014679  -7.8693 0.0000000000000652981 ***
## idcode54     -0.507419486  0.133513671  -3.8005             0.0001749 ***
## idcode55     -0.507573234  0.078234045  -6.4879 0.0000000003588493396 ***
## idcode56      0.452114772  0.086344376   5.2362 0.0000003089718817572 ***
## idcode57     -0.659740720  0.093034575  -7.0913 0.0000000000095685570 ***
## idcode58     -0.371916224  0.074128131  -5.0172 0.0000008995766272716 ***
## idcode59     -0.772999280  0.105322545  -7.3394 0.0000000000020251288 ***
## idcode60     -0.472893472  0.076772581  -6.1597 0.0000000023395400635 ***
## idcode61     -0.672604534  0.076492264  -8.7931 < 0.00000000000000022 ***
## idcode62     -0.641582540  0.094923414  -6.7589 0.0000000000724280392 ***
## idcode63      0.449173252  0.038303903  11.7266 < 0.00000000000000022 ***
## idcode64     -0.814562507  0.026363980 -30.8968 < 0.00000000000000022 ***
## idcode65      0.026038033  0.033831953   0.7696             0.4421259    
## idcode66     -0.943126151  0.027257104 -34.6011 < 0.00000000000000022 ***
## idcode67      0.053865528  0.086645746   0.6217             0.5346275    
## idcode68     -0.113963868  0.077602409  -1.4686             0.1429997    
## idcode69     -0.419262480  0.074757009  -5.6083 0.0000000463900848314 ***
## idcode70      0.369140622  0.135687907   2.7205             0.0068985 ** 
## idcode71     -0.599546136  0.078966252  -7.5924 0.0000000000004004296 ***
## idcode72     -0.313906359  0.041772446  -7.5147 0.0000000000006614015 ***
## idcode73     -0.299503787  0.077343358  -3.8724             0.0001323 ***
## idcode75     -0.263173450  0.035297425  -7.4559 0.0000000000009644375 ***
## idcode76     -0.359518171  0.088468329  -4.0638 0.0000617129840000873 ***
## idcode77     -0.642858506  0.074933402  -8.5791 0.0000000000000005214 ***
## idcode78      0.206080553  0.078552173   2.6235             0.0091485 ** 
## idcode80      0.149517588  0.154257842   0.9693             0.3331910    
## idcode81     -0.896375515  0.027127868 -33.0426 < 0.00000000000000022 ***
## idcode82      0.000080823  0.058131273   0.0014             0.9988916    
## idcode83     -0.410092411  0.075033413  -5.4655 0.0000000971992505980 ***
## idcode85      0.061787835  0.114827904   0.5381             0.5909134    
## idcode86      0.154981558  0.055975417   2.7687             0.0059776 ** 
## idcode87      0.249873439  0.074624913   3.3484             0.0009165 ***
## idcode89      0.314653024  0.075322797   4.1774 0.0000387055812566108 ***
## idcode92     -0.278239872  0.020582628 -13.5182 < 0.00000000000000022 ***
## idcode93     -0.131229799  0.082706012  -1.5867             0.1136336    
## idcode94     -0.822933804  0.081842758 -10.0551 < 0.00000000000000022 ***
## idcode95     -0.567598658  0.075500350  -7.5178 0.0000000000006481211 ***
## idcode97     -0.630204742  0.050689734 -12.4326 < 0.00000000000000022 ***
## idcode98     -0.311006444  0.033116055  -9.3914 < 0.00000000000000022 ***
## idcode99     -0.164281033  0.077465020  -2.1207             0.0347663 *  
## tenure:union  0.014974113  0.009646023   1.5524             0.1216301    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Ahora para los CI:

m1cis2 <- coefci(m1, parm = coi_indices, vcov = cl_vcov_mat)
all(near(m1cis, m1cis2))
## [1] TRUE
m1cis2
##                     2.5 %     97.5 %
## (Intercept)   1.572314302 2.19264216
## age          -0.006845258 0.01810688
## tenure       -0.001184099 0.04269695
## union        -0.026048678 0.37528746
## tenure:union -0.004008324 0.03395655

tambien cuando queremos calcualr los efectos marginales tenemos que tener en cuenta y pasar la correcta matriz de varainza y covarainza:

library(margins)
margins(m1, vcov = cl_vcov_mat, variables = 'tenure',
        at = list(union = 0:1)) %>%
  summary()
##  factor  union    AME     SE      z      p   lower  upper
##  tenure 0.0000 0.0208 0.0111 1.8617 0.0626 -0.0011 0.0426
##  tenure 1.0000 0.0357 0.0083 4.3089 0.0000  0.0195 0.0520

Sino se ocuparian los se clasico que son mas pequeños:

margins(m1, variables = 'tenure', at = list(union = 0:1)) %>% summary()
##  factor  union    AME     SE      z      p  lower  upper
##  tenure 0.0000 0.0208 0.0070 2.9804 0.0029 0.0071 0.0344
##  tenure 1.0000 0.0357 0.0081 4.3846 0.0000 0.0198 0.0517

lm.cluster de miceadds

library(miceadds)
## Loading required package: mice
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
## * miceadds 3.11-6 (2021-01-21 11:48:47)
m2 <- lm.cluster(ln_wage ~ age + tenure + union + tenure:union + idcode,
                 cluster = 'idcode',
                 data = nlswork)
m2coeffs <- data.frame(summary(m2))
## R^2= 0.7554 
## 
##                   Estimate  Std. Error       t value
## (Intercept)   1.8824782318 0.157611390  11.943795604
## age           0.0056308093 0.006339777   0.888171456
## tenure        0.0207564257 0.011149190   1.861698113
## union         0.1746193937 0.101970509   1.712449960
## idcode2      -0.6173864975 0.020561516 -30.026312169
## idcode3      -0.5625145462 0.085371134  -6.589048537
## idcode4      -0.3006118668 0.045217648  -6.648109377
## idcode5      -0.1926522258 0.080696916  -2.387355494
## idcode6      -0.4485887031 0.076998894  -5.825911011
## idcode7      -0.8031515238 0.077586665 -10.351669680
## idcode9      -0.4779072019 0.077282396  -6.183907675
## idcode10     -0.7421818310 0.075596923  -9.817619531
## idcode12      0.5378936290 0.121048674   4.443614376
## idcode13      0.2080858136 0.092384493   2.252388982
## idcode14     -0.0416319212 0.125721970  -0.331142769
## idcode15      0.1470966753 0.020541338   7.161007605
## idcode16     -0.0020146378 0.082613716  -0.024386238
## idcode17      0.0935176785 0.074920103   1.248232108
## idcode18     -0.6674031151 0.093227938  -7.158831637
## idcode19     -0.1266455355 0.074677689  -1.695895211
## idcode20     -0.0858254222 0.049171754  -1.745421212
## idcode21     -0.3426033249 0.077785362  -4.404470421
## idcode22      0.0269531073 0.081174846   0.332037678
## idcode23     -0.2805850710 0.099365805  -2.823758853
## idcode24     -0.0160126675 0.084325730  -0.189890648
## idcode25      0.0142308194 0.091684772   0.155214646
## idcode26     -0.3225635895 0.088854385  -3.630249521
## idcode27      0.0660282521 0.074750627   0.883313691
## idcode29     -0.1467049122 0.074887273  -1.959009930
## idcode30     -0.5854172395 0.057001836 -10.270147099
## idcode35     -0.9385858715 0.083696844 -11.214113011
## idcode36     -0.1024783033 0.028295522  -3.621714566
## idcode38     -0.6226392827 0.078059836  -7.976435974
## idcode39      0.0884864695 0.117163740   0.755237662
## idcode40     -0.2582721861 0.077119289  -3.348995936
## idcode41     -0.4450622210 0.074505142  -5.973577236
## idcode43     -0.4836171770 0.081599952  -5.926684590
## idcode44      0.6210824196 0.034820934  17.836466505
## idcode45     -0.3883364879 0.074270196  -5.228698864
## idcode46     -0.9235701833 0.142324438  -6.489189035
## idcode47     -0.6122767314 0.107590090  -5.690828344
## idcode48     -0.6469298240 0.108003964  -5.989871089
## idcode49     -0.4273652745 0.081746977  -5.227903128
## idcode50     -0.1422589127 0.086063210  -1.652958472
## idcode51     -0.5066794589 0.080025891  -6.331444126
## idcode53     -0.5903136238 0.075014679  -7.869308168
## idcode54     -0.5074194859 0.133513671  -3.800505829
## idcode55     -0.5075732336 0.078234045  -6.487881738
## idcode56      0.4521147719 0.086344376   5.236180907
## idcode57     -0.6597407202 0.093034575  -7.091349872
## idcode58     -0.3719162244 0.074128131  -5.017207643
## idcode59     -0.7729992804 0.105322545  -7.339352431
## idcode60     -0.4728934717 0.076772581  -6.159666161
## idcode61     -0.6726045335 0.076492264  -8.793105305
## idcode62     -0.6415825402 0.094923414  -6.758949255
## idcode63      0.4491732522 0.038303903  11.726566055
## idcode64     -0.8145625068 0.026363980 -30.896796343
## idcode65      0.0260380326 0.033831953   0.769628414
## idcode66     -0.9431261515 0.027257104 -34.601113836
## idcode67      0.0538655277 0.086645746   0.621675389
## idcode68     -0.1139638684 0.077602409  -1.468560960
## idcode69     -0.4192624797 0.074757009  -5.608336739
## idcode70      0.3691406220 0.135687907   2.720512310
## idcode71     -0.5995461359 0.078966252  -7.592435047
## idcode72     -0.3139063594 0.041772446  -7.514675017
## idcode73     -0.2995037866 0.077343358  -3.872391799
## idcode75     -0.2631734504 0.035297425  -7.455882337
## idcode76     -0.3595181708 0.088468329  -4.063806503
## idcode77     -0.6428585057 0.074933402  -8.579064670
## idcode78      0.2060805530 0.078552173   2.623486327
## idcode80      0.1495175884 0.154257842   0.969270586
## idcode81     -0.8963755154 0.027127868 -33.042607774
## idcode82      0.0000808235 0.058131273   0.001390362
## idcode83     -0.4100924114 0.075033413  -5.465463938
## idcode85      0.0617878354 0.114827904   0.538090772
## idcode86      0.1549815582 0.055975417   2.768743250
## idcode87      0.2498734388 0.074624913   3.348391671
## idcode89      0.3146530244 0.075322797   4.177394308
## idcode92     -0.2782398716 0.020582628 -13.518189997
## idcode93     -0.1312297987 0.082706012  -1.586702043
## idcode94     -0.8229338037 0.081842758 -10.055059521
## idcode95     -0.5675986576 0.075500350  -7.517828175
## idcode97     -0.6302047417 0.050689734 -12.432591176
## idcode98     -0.3110064438 0.033116055  -9.391409756
## idcode99     -0.1642810328 0.077465020  -2.120712455
## tenure:union  0.0149741126 0.009646023   1.552361330
##                                                                                                                                                                                                                                                                                    Pr(>|t|)
## (Intercept)  0.0000000000000000000000000000000069956385889517171813509199829184126429026946425437927246093750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## age          0.3744485303545580512363244451989885419607162475585937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## tenure       0.0626456547335381697116929444746347144246101379394531250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## union        0.0868137787632022178696544756348885130137205123901367187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode2      0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000044511640537348327829533789490312756242929026484489440917968750000000000
## idcode3      0.0000000000442654072532547773191277662441223128553247079253196716308593750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode4      0.0000000000296881663997803942967382284656707724934676662087440490722656250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode5      0.0169700727727976168057733019622901338152587413787841796875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode6      0.0000000056801879073699325086929667882884587015723809599876403808593750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode7      0.0000000000000000000000004112502734267071033760765352838006947422400116920471191406250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode9      0.0000000006253394964951842311881580194210528134135529398918151855468750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode10     0.0000000000000000000000945498149586574776828684640861411025980487465858459472656250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode12     0.0000088460095423274923498793698151132502971449866890907287597656250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode13     0.0242977014145127900857090708086616359651088714599609375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode14     0.7405366474253608499722645319707226008176803588867187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode15     0.0000000000008008620595421087193753473254531627389951609075069427490234375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode16     0.9805445253423623608313164368155412375926971435546875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode17     0.2119460689167315115000178593618329614400863647460937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode18     0.0000000000008136754249687142628855163195567001821473240852355957031250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode19     0.0899057266875898375424469577410491183400154113769531250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode20     0.0809115759253638405779085474023304414004087448120117187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode21     0.0000106042607766650990241356217191892596929392311722040176391601562500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode22     0.7398608082448917544482469565991777926683425903320312500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode23     0.0047464081360812839655016759365935286041349172592163085937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode24     0.8493948211455371044564799376530572772026062011718750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode25     0.8766521027324689629978138327714987099170684814453125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode26     0.0002831473635429200834574825496048333661747165024280548095703125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode27     0.3770668088397418626911417049996089190244674682617187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode29     0.0501116239032735680258845434309478150680661201477050781250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode30     0.0000000000000000000000009605808116674976210140612931098758053849451243877410888671875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode35     0.0000000000000000000000000000347654255351445783421213131525462358695222064852714538574218750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode36     0.0002926569272287759897709091827522343010059557855129241943359375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode38     0.0000000000000015061924358722713909210233484969876371906138956546783447265625000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode39     0.4501063945302778979140612136689014732837677001953125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode40     0.0008110498548946737081347113651474955986486747860908508300781250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode41     0.0000000023210682033293047452950086384504402303718961775302886962890625000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode43     0.0000000030911188106227357329409966002486953584593720734119415283203125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode44     0.0000000000000000000000000000000000000000000000000000000000000000000000368259462170502790577183982456688227102858945727348327636718750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode45     0.0000001707071504245087291157939590746650537766981869935989379882812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode46     0.0000000000862996309167986347041234829191580502083525061607360839843750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode47     0.0000000126424553658071158227741564994417444722785148769617080688476562500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode48     0.0000000021000743039279112821856448922730464801134075969457626342773437500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode49     0.0000001714432920209763564541732838719667597615625709295272827148437500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode50     0.0983393155800409701772224480009754188358783721923828125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode51     0.0000000002428771230445260134907237148382819214020855724811553955078125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode53     0.0000000000000035660776415613057996941565042092747717106249183416366577148437500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode54     0.0001444010208985343156860847724232144173583947122097015380859375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode55     0.0000000000870515652644177093111288456839247373864054679870605468750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode56     0.0000001639332769486820475315913547831314645009115338325500488281250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode57     0.0000000000013281010302756145146068494922175773353956174105405807495117187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode58     0.0000005242789528402487331335685150079939376155380159616470336914062500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode59     0.0000000000002146297607851523583499114833372800603683572262525558471679687500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode60     0.0000000007289845614735232530792541361108760611386969685554504394531250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode61     0.0000000000000000014548199888991990441373253206691629202396143227815628051757812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode62     0.0000000000138996062424207967233788507677161305764457210898399353027343750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode63     0.0000000000000000000000000000000931619843531323772994195975272191390104126185178756713867187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode64     0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000131888096690078866085344549974323058449954260140657424926758
## idcode65     0.4415203449300767468699291384837124496698379516601562500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode66     0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002430858
## idcode67     0.5341553374356893257868250657338649034500122070312500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode68     0.1419519103086108735567449912196025252342224121093750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode69     0.0000000204280272039128866514400006959562006159103475511074066162109375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode70     0.0065180845036750117982293772911361884325742721557617187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode71     0.0000000000000313948868983398830497313913845403021696256473660469055175781250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode72     0.0000000000000570524782977234539435920646077704532217467203736305236816406250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode73     0.0001077725241374362228534769525545300439262064173817634582519531250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode75     0.0000000000000892683854856407229313303153261927036510314792394638061523437500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode76     0.0000482788531940988070029208145683696784544736146926879882812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode77     0.0000000000000000095647693210070526964594606056380143854767084121704101562500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode78     0.0087034938377815555810013137261194060556590557098388671875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode80     0.3324102012046333820549648407904896885156631469726562500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode81     0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000198649132776368185938214994124
## idcode82     0.9988906521715307240683046074991580098867416381835937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode83     0.0000000461697651476077833287980922705173725262284278869628906250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode85     0.5905143827234171638451698527205735445022583007812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode86     0.0056272961254201451369860720319593383464962244033813476562500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode87     0.0008128205036923120485950478020242826460162177681922912597656250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode89     0.0000294867604012904960210025850919635104219196364283561706542968750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode92     0.0000000000000000000000000000000000000000122144133200032019759186802110306757640501018613576889038085937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode93     0.1125801434146576768258185552440409082919359207153320312500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode94     0.0000000000000000000000087268683414332915470115747957891016994835808873176574707031250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode95     0.0000000000000556936290304022357453006542016282764961943030357360839843750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode97     0.0000000000000000000000000000000000173904434244226469745767116314993927517207339406013488769531250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode98     0.0000000000000000000059202475245503242244626829204889872926287353038787841796875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## idcode99     0.0339460077102710922059003451067837886512279510498046875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
## tenure:union 0.1205757908535359385071572546621609944850206375122070312500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
m2coeffs[!startsWith(row.names(m2coeffs), 'idcode'),]
##                 Estimate  Std..Error    t.value
## (Intercept)  1.882478232 0.157611390 11.9437956
## age          0.005630809 0.006339777  0.8881715
## tenure       0.020756426 0.011149190  1.8616981
## union        0.174619394 0.101970509  1.7124500
## tenure:union 0.014974113 0.009646023  1.5523613
##                                               Pr...t..
## (Intercept)  0.000000000000000000000000000000006995639
## age          0.374448530354558051236324445198988541961
## tenure       0.062645654733538169711692944474634714425
## union        0.086813778763202217869654475634888513014
## tenure:union 0.120575790853535938507157254662160994485

Un objeto m devuelto por lm.cluster es una lista que contiene el objeto lm como m$lmres y la matriz de covarianza como m$vcov. Nuevamente, estos objetos deben ser “arrastrados” si queremos hacer más cálculos. Para los márgenes, también necesitamos pasar los datos nuevamente a través de data = nlswork:

margins(m2$lm_res, vcov = m2$vcov, variables = 'tenure',
        at = list(union = 0:1), data = nlswork) %>%
  summary()
##  factor  union    AME     SE      z      p   lower  upper
##  tenure 0.0000 0.0208 0.0111 1.8617 0.0626 -0.0011 0.0426
##  tenure 1.0000 0.0357 0.0083 4.3089 0.0000  0.0195 0.0520

Los resultados son consistentes con los primeros que habiamos analizado

LM.ROBUST DE ESTIMATR

Es más conveniente de usar, pero no se basa en sándwich y lmtest en segundo plano y, en cambio, viene con una implementación propia para el ajuste del modelo y la estimación de covarianza. Se supone que esta implementación es más rápida que los otros enfoques y lo comprobaremos para nuestro ejemplo más adelante.

library(estimatr)
m3 <- lm_robust(ln_wage ~ age + tenure + union + tenure:union + idcode,
                clusters = idcode,
                data = nlswork)
summary(m3)
## 
## Call:
## lm_robust(formula = ln_wage ~ age + tenure + union + tenure:union + 
##     idcode, data = nlswork, clusters = idcode)
## 
## Standard error type:  CR2 
## 
## Coefficients:
##                 Estimate Std. Error    t value                   Pr(>|t|)
## (Intercept)   1.88247823   0.141424  13.310872 0.000000000000085960024705
## age           0.00563081   0.005726   0.983442 0.334189099077033313633933
## tenure        0.02075643   0.010089   2.057345 0.057705708327358107290195
## union         0.17461939   0.093790   1.861817 0.077353319755512253697027
## idcode2      -0.61738650   0.019094 -32.334722 0.000000281016242074282974
## idcode3      -0.56251455   0.078607  -7.156034 0.000000968724165968238520
## idcode4      -0.30061187   0.041448  -7.252725 0.000000211267954701404215
## idcode5      -0.19265223   0.074263  -2.594206 0.018017816498479385239895
## idcode6      -0.44858870   0.070255  -6.385123 0.000003995690780115374078
## idcode7      -0.80315152   0.071358 -11.255211 0.000000001566379152880247
## idcode9      -0.47790720   0.071277  -6.704956 0.000408521368932207281158
## idcode10     -0.74218183   0.069257 -10.716369 0.000000001243137900187216
## idcode12      0.53789363   0.110497   4.867966 0.000361599246116294165739
## idcode13      0.20808581   0.085017   2.447588 0.023714287553875938729941
## idcode14     -0.04163192   0.115232  -0.361288 0.721131282344075641432823
## idcode15      0.14709668   0.018881   7.790738 0.000124566414389170600999
## idcode16     -0.00201464   0.075800  -0.026578 0.979084555331021899604593
## idcode17      0.09351768   0.068702   1.361210 0.190287019093952236570289
## idcode18     -0.66740312   0.085810  -7.777663 0.000000207326038369263688
## idcode19     -0.12664554   0.068739  -1.842424 0.081078053171516339947544
## idcode20     -0.08582542   0.045275  -1.895665 0.074175533232673104833133
## idcode21     -0.34260332   0.071283  -4.806229 0.000129792655333069677356
## idcode22      0.02695311   0.074734   0.360652 0.722540303564294106308807
## idcode23     -0.28058507   0.091404  -3.069721 0.005917574905232716141190
## idcode24     -0.01601267   0.077570  -0.206428 0.838478108923405685004582
## idcode25      0.01423082   0.084407   0.168597 0.867853981887484193791238
## idcode26     -0.32256359   0.081811  -3.942803 0.000857953501503282644179
## idcode27      0.06602825   0.068375   0.965672 0.346735345068670652857179
## idcode29     -0.14670491   0.068382  -2.145376 0.044835688312530402876988
## idcode30     -0.58541724   0.052411 -11.169744 0.000000000448223718274182
## idcode35     -0.93858587   0.077000 -12.189398 0.000000000117397018590807
## idcode36     -0.10247830   0.025503  -4.018338 0.000388835358853529926282
## idcode38     -0.62263928   0.071804  -8.671432 0.000000076599444366664475
## idcode39      0.08848647   0.107536   0.822851 0.419181466669480418119065
## idcode40     -0.25827219   0.070144  -3.682050 0.001396852698703289746146
## idcode41     -0.44506222   0.068236  -6.522351 0.000003501566056804658609
## idcode43     -0.48361718   0.074991  -6.449029 0.000004314797013122251241
## idcode44      0.62108242   0.031538  19.692999 0.000000000000000006524939
## idcode45     -0.38833649   0.068618  -5.659414 0.001277314755757718656506
## idcode46     -0.92357018   0.130247  -7.090923 0.000000254333771877262649
## idcode47     -0.61227673   0.098785  -6.198059 0.000002538831222825474810
## idcode48     -0.64692982   0.099143  -6.525208 0.000001059321776889147183
## idcode49     -0.42736527   0.075243  -5.679838 0.000019130066052246769615
## idcode50     -0.14225891   0.079252  -1.795012 0.088763080103828315148284
## idcode51     -0.50667946   0.073575  -6.886579 0.000001250316916770098504
## idcode53     -0.59031362   0.068655  -8.598222 0.000000048629876230263703
## idcode54     -0.50741949   0.122258  -4.150416 0.000358882656392911958583
## idcode55     -0.50757323   0.071974  -7.052166 0.000001469316212701647154
## idcode56      0.45211477   0.079447   5.690754 0.000014265620372307271412
## idcode57     -0.65974072   0.085639  -7.703703 0.000000232317239182537501
## idcode58     -0.37191622   0.068133  -5.458645 0.000061380980964227815238
## idcode59     -0.77299928   0.096756  -7.989175 0.000000058520977478327791
## idcode60     -0.47289347   0.069840  -6.771138 0.000001339833671061451105
## idcode61     -0.67260453   0.069613  -9.662049 0.000000005862752166266787
## idcode62     -0.64158254   0.087356  -7.344433 0.000000426242018133933483
## idcode63      0.44917325   0.034629  12.970844 0.000000000000438852077234
## idcode64     -0.81456251   0.024183 -33.683522 0.000000000000000000002523
## idcode65      0.02603803   0.031084   0.837664 0.413971584524491009737801
## idcode66     -0.94312615   0.024879 -37.908951 0.000000000000000000802778
## idcode67      0.05386553   0.079751   0.675422 0.507324227884402456645319
## idcode68     -0.11396387   0.071367  -1.596877 0.127701434837349275808194
## idcode69     -0.41926248   0.068383  -6.131083 0.000006537594822158512846
## idcode70      0.36914062   0.123311   2.993574 0.010764557612627413327178
## idcode71     -0.59954614   0.072666  -8.250737 0.000000161848376824041636
## idcode72     -0.31390636   0.038469  -8.159920 0.000000064716321791168658
## idcode73     -0.29950379   0.070907  -4.223904 0.000486906101748865575464
## idcode75     -0.26317345   0.033062  -7.960111 0.001048605589120781000911
## idcode76     -0.35951817   0.081443  -4.414367 0.000284864118837227075359
## idcode77     -0.64285851   0.068738  -9.352292 0.000000025392085878372373
## idcode78      0.20608055   0.072336   2.848948 0.010206593353394450771066
## idcode80      0.14951759   0.140997   1.060429 0.299287670880394762740195
## idcode81     -0.89637552   0.024709 -36.277093 0.000000000183665580490624
## idcode82      0.00008082   0.052750   0.001532 0.998791776611343662395370
## idcode83     -0.41009241   0.068804  -5.960342 0.000012339058546977380650
## idcode85      0.06178784   0.105345   0.586527 0.563121321301010890891803
## idcode86      0.15498156   0.050828   3.049130 0.005077179246036159568356
## idcode87      0.24987344   0.068320   3.657393 0.001700183317601901451799
## idcode89      0.31465302   0.068767   4.575659 0.000184014795980010965491
## idcode92     -0.27823987   0.019266 -14.442038 0.000048595154376371107021
## idcode93     -0.13122980   0.076051  -1.725559 0.099663748198468168904007
## idcode94     -0.82293380   0.075070 -10.962284 0.000000000083981907591783
## idcode95     -0.56759866   0.068946  -8.232499 0.000000069166653449658185
## idcode97     -0.63020474   0.048297 -13.048583 0.000305523768439564891251
## idcode98     -0.31100644   0.030132 -10.321477 0.000000000922328999943985
## idcode99     -0.16428103   0.070420  -2.332868 0.030156038173085507325677
## tenure:union  0.01497411   0.009043   1.655800 0.140620234085015588521017
##                CI Lower  CI Upper     DF
## (Intercept)   1.5930761  2.171880 28.642
## age          -0.0061215  0.017383 26.790
## tenure       -0.0007714  0.042284 14.812
## union        -0.0209918  0.370231 20.049
## idcode2      -0.6656871 -0.569086  5.283
## idcode3      -0.7273116 -0.397718 18.550
## idcode4      -0.3863182 -0.214905 23.174
## idcode5      -0.3483282 -0.036976 18.572
## idcode6      -0.5956313 -0.301546 19.007
## idcode7      -0.9531732 -0.653130 17.828
## idcode9      -0.6497403 -0.306074  6.394
## idcode10     -0.8868561 -0.597508 19.563
## idcode12      0.2977485  0.778039 12.279
## idcode13      0.0307713  0.385400 20.048
## idcode14     -0.2798036  0.196540 23.362
## idcode15      0.1021954  0.191998  6.809
## idcode16     -0.1611084  0.157079 18.250
## idcode17     -0.0508453  0.237881 17.956
## idcode18     -0.8466458 -0.488160 19.581
## idcode19     -0.2705188  0.017228 18.996
## idcode20     -0.1809430  0.009292 18.002
## idcode21     -0.4920192 -0.193187 18.598
## idcode22     -0.1300116  0.183918 18.075
## idcode23     -0.4709303 -0.090240 20.539
## idcode24     -0.1774947  0.145469 20.651
## idcode25     -0.1621560  0.190618 19.456
## idcode26     -0.4936705 -0.151457 19.207
## idcode27     -0.0774041  0.209461 18.392
## idcode29     -0.2896705 -0.003739 19.318
## idcode30     -0.6947067 -0.476128 20.109
## idcode35     -1.0993101 -0.777862 19.799
## idcode36     -0.1546717 -0.050285 28.564
## idcode38     -0.7734974 -0.471781 17.992
## idcode39     -0.1341738  0.311147 22.625
## idcode40     -0.4041975 -0.112347 20.874
## idcode41     -0.5881810 -0.301943 18.432
## idcode43     -0.6410420 -0.326192 18.201
## idcode44      0.5564712  0.685694 27.922
## idcode45     -0.5559585 -0.220714  6.042
## idcode46     -1.1924378 -0.654703 23.914
## idcode47     -0.8166392 -0.407914 22.980
## idcode48     -0.8517946 -0.442065 23.473
## idcode49     -0.5850515 -0.269679 18.647
## idcode50     -0.3082671  0.023749 18.781
## idcode51     -0.6604088 -0.352950 19.496
## idcode53     -0.7338286 -0.446799 19.363
## idcode54     -0.7597206 -0.255118 24.047
## idcode55     -0.6588695 -0.356277 17.861
## idcode56      0.2864096  0.617820 20.036
## idcode57     -0.8385752 -0.480906 19.668
## idcode58     -0.5168836 -0.226949 15.310
## idcode59     -0.9736080 -0.572391 22.096
## idcode60     -0.6185190 -0.327268 20.124
## idcode61     -0.8178559 -0.527353 19.912
## idcode62     -0.8238080 -0.459357 19.994
## idcode63      0.3781049  0.520242 26.882
## idcode64     -0.8645431 -0.764582 23.383
## idcode65     -0.0396004  0.091676 16.809
## idcode66     -0.9953404 -0.890912 18.264
## idcode67     -0.1127453  0.220476 19.537
## idcode68     -0.2639009  0.035973 17.998
## idcode69     -0.5623015 -0.276223 19.175
## idcode70      0.1016514  0.636630 12.498
## idcode71     -0.7522521 -0.446840 17.933
## idcode72     -0.3939593 -0.233853 20.780
## idcode73     -0.4482191 -0.150788 18.440
## idcode75     -0.3529142 -0.173433  4.245
## idcode76     -0.5297418 -0.189295 19.401
## idcode77     -0.7873007 -0.498416 17.950
## idcode78      0.0547675  0.357394 19.163
## idcode80     -0.1411659  0.440201 24.511
## idcode81     -0.9529680 -0.839783  8.327
## idcode82     -0.1095414  0.109703 21.246
## idcode83     -0.5546664 -0.265518 17.960
## idcode85     -0.1558956  0.279471 23.467
## idcode86      0.0507124  0.259251 27.120
## idcode87      0.1067680  0.392979 18.787
## idcode89      0.1711956  0.458110 19.973
## idcode92     -0.3289121 -0.227568  4.649
## idcode93     -0.2897483  0.027289 20.239
## idcode94     -0.9779089 -0.667959 23.886
## idcode95     -0.7113235 -0.423874 20.207
## idcode97     -0.7683820 -0.492027  3.719
## idcode98     -0.3736033 -0.248410 21.370
## idcode99     -0.3111280 -0.017434 20.100
## tenure:union -0.0062982  0.036246  7.187
## 
## Multiple R-squared:  0.7554 ,    Adjusted R-squared:  0.6861 
## F-statistic:    NA on 85 and 81 DF,  p-value: NA

A diferencia de lm, lm_robust permite especificar efectos fijos en un parámetro de fórmula de efectos fijos separado que, según la documentación, debería acelerar el cálculo para muchos tipos de SE. Además, esto limpia la salida resumida ya que no hay más coeficientes FE:

m3fe <- lm_robust(ln_wage ~ age + tenure + union + tenure:union,
                  clusters = idcode,
                  fixed_effects = ~idcode,
                  data = nlswork)
summary(m3fe)
## 
## Call:
## lm_robust(formula = ln_wage ~ age + tenure + union + tenure:union, 
##     data = nlswork, clusters = idcode, fixed_effects = ~idcode)
## 
## Standard error type:  CR2 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   CI Lower CI Upper     DF
## age          0.005631   0.005726  0.9834  0.33419 -0.0061215  0.01738 26.790
## tenure       0.020756   0.010089  2.0573  0.05771 -0.0007714  0.04228 14.812
## union        0.174619   0.093790  1.8618  0.07735 -0.0209918  0.37023 20.049
## tenure:union 0.014974   0.009043  1.6558  0.14062 -0.0062982  0.03625  7.187
## 
## Multiple R-squared:  0.7554 ,    Adjusted R-squared:  0.6861
## Multiple R-squared (proj. model):  0.199 ,   Adjusted R-squared (proj. model):  -0.02795 
## F-statistic (proj. model): 17.56 on 4 and 81 DF,  p-value: 0.0000000002071

Cuando comparamos los resultados de lm_robust con lm, podemos ver que las estimaciones puntuales son las mismas. Los SE de lm_robust son, como se esperaba, más altos que los SE “clásicos” de lm. Sin embargo, los SE de lm_robust también son un poco más pequeños que los calculados a partir de sandwich :: vcovCL:

m3fe_df <- tidy(m3fe) %>% rename(est.lm_robust = estimate,
                                 se.lm_robust = std.error)
m3fe_df$se.sandwich <-  m1coeffs_cl[coi_indices,2][2:5]
m3fe_df$est.classic <-  m1coeffs_std[coi_indices,1][2:5]
m3fe_df$se.classic <-  m1coeffs_std[coi_indices,2][2:5]
m3fe_df[c('term', 'est.classic', 'est.lm_robust', 'se.classic',
          'se.lm_robust', 'se.sandwich')]
##           term est.classic est.lm_robust  se.classic se.lm_robust se.sandwich
## 1          age 0.005630809   0.005630809 0.003109803  0.005725617 0.006339777
## 2       tenure 0.020756426   0.020756426 0.006964417  0.010088940 0.011149190
## 3        union 0.174619394   0.174619394 0.060646038  0.093789776 0.101970509
## 4 tenure:union 0.014974113   0.014974113 0.009548509  0.009043429 0.009646023
m3fe_df
##           term est.lm_robust se.lm_robust statistic    p.value      conf.low
## 1          age   0.005630809  0.005725617 0.9834415 0.33418910 -0.0061214991
## 2       tenure   0.020756426  0.010088940 2.0573446 0.05770571 -0.0007714321
## 3        union   0.174619394  0.093789776 1.8618169 0.07735332 -0.0209917694
## 4 tenure:union   0.014974113  0.009043429 1.6558004 0.14062023 -0.0062981959
##    conf.high        df outcome se.sandwich est.classic  se.classic
## 1 0.01738312 26.789873 ln_wage 0.006339777 0.005630809 0.003109803
## 2 0.04228428 14.812039 ln_wage 0.011149190 0.020756426 0.006964417
## 3 0.37023056 20.049372 ln_wage 0.101970509 0.174619394 0.060646038
## 4 0.03624642  7.186527 ln_wage 0.009646023 0.014974113 0.009548509

Esto se debe a que lm_robust utiliza de forma predeterminada un estimador de varianza robusto de conglomerados diferente “para corregir pruebas de hipótesis para muestras pequeñas y trabajar con efectos fijos y pesos comúnmente especificados”, como se explica en la viñeta Introducción. Los detalles se pueden encontrar en las notas matemáticas para estimatr.

Al igual que con los resultados de lm y lm.cluster, también podemos estimar los efectos marginales con un objeto de resultado lm_robust. Sin embargo, esto no parece funcionar cuando especificas FEs a través del parámetro fixed_effects como se hizo para m3fe:

margins::margins(m3fe, variables = 'tenure', at = list(union = 0:1)) %>%
  summary()

Con m3 (donde los FE se especificaron directamente en la fórmula del modelo), la estimación de efectos marginales funciona y ni siquiera necesitamos pasar una matriz vcov separada, ya que esta información ya viene con el objeto de resultado lm_robust m3.

margins(m3, variables = 'tenure', at = list(union = 0:1)) %>% summary()
## Warning in sqrt(var_fit): NaNs produced

## Warning in sqrt(var_fit): NaNs produced
##  factor  union    AME     SE      z      p  lower  upper
##  tenure 0.0000 0.0208 0.0101 2.0573 0.0397 0.0010 0.0405
##  tenure 1.0000 0.0357 0.0077 4.6174 0.0000 0.0206 0.0509

Como ya vimosº, lm_robust usa un estimador de varianza diferente al de vcovCL y Stata de sandwich. Sin embargo, al establecer se_type en ‘stata’, podemos replicar estos “SE agrupados en Stata”:

m3stata <- lm_robust(ln_wage ~ age + tenure + union + tenure:union + idcode,
                     clusters = idcode,
                     se_type = 'stata',
                     data = nlswork)
m3stata_se <- tidy(m3stata) %>% pull(std.error)
all(m3stata_se)
## Warning in all(m3stata_se): coercing argument of type 'double' to logical
## [1] TRUE

En resumen, lm_robust es tan conveniente de usar como lm o lm.cluster, pero ofrece una flexibilidad similar a la de sándwich para estimar SE agrupados. Una gran ventaja es que no necesita preocuparse por proporcionar la matriz de covarianza correcta para otras funciones posteriores a la estimación, como los márgenes.

Datos Panel

Ahora veremos como incorporar este tipo de errores cuando estamos trabajando con datos panel

Importamso los datos:

data(crime4)

Vamos a estructurar los datos:

crime4.p <- pdata.frame(crime4, index=c("county", "year") )

Calcualmos nuestra regresion:

reg <- plm(log(crmrte) ~ d83 + d84 + d85 + d86 + d87 + lprbarr + 
             lprbconv + lprbpris + lavgsen + lpolpc, 
           data=crime4.p, model = "fd") 

Para calcular los errores robustos para nuestro modelo de primeras diferencias podemos utilizar coeftest:

coeftest(reg) 
## 
## t test of coefficients:
## 
##               Estimate Std. Error  t value              Pr(>|t|)    
## (Intercept)  0.0077134  0.0170579   0.4522             0.6513193    
## d83         -0.0998658  0.0238953  -4.1793       0.0000342093317 ***
## d84         -0.1478033  0.0412794  -3.5806             0.0003744 ***
## d85         -0.1524144  0.0584000  -2.6098             0.0093152 ** 
## d86         -0.1249001  0.0760042  -1.6433             0.1009087    
## d87         -0.0840734  0.0940003  -0.8944             0.3715175    
## lprbarr     -0.3274942  0.0299801 -10.9237 < 0.00000000000000022 ***
## lprbconv    -0.2381066  0.0182341 -13.0583 < 0.00000000000000022 ***
## lprbpris    -0.1650463  0.0259690  -6.3555       0.0000000004488 ***
## lavgsen     -0.0217606  0.0220909  -0.9850             0.3250506    
## lpolpc       0.3984264  0.0268820  14.8213 < 0.00000000000000022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

O especificnado la matriz de varianza y covarianza:

coeftest(reg, vcovHC)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value       Pr(>|t|)    
## (Intercept)  0.0077134  0.0135800  0.5680      0.5702805    
## d83         -0.0998658  0.0219261 -4.5547 0.000006519239 ***
## d84         -0.1478033  0.0355659 -4.1558 0.000037806823 ***
## d85         -0.1524144  0.0505404 -3.0157      0.0026871 ** 
## d86         -0.1249001  0.0623827 -2.0022      0.0457778 *  
## d87         -0.0840734  0.0773366 -1.0871      0.2774836    
## lprbarr     -0.3274942  0.0555908 -5.8912 0.000000006828 ***
## lprbconv    -0.2381066  0.0389969 -6.1058 0.000000001982 ***
## lprbpris    -0.1650463  0.0451128 -3.6585      0.0002791 ***
## lavgsen     -0.0217606  0.0254368 -0.8555      0.3926740    
## lpolpc       0.3984264  0.1014068  3.9290 0.000096617111 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Si estuvieran haciendo una comparacion contra una estimacion de stata

coeftest(reg, vcovHC(reg, type="sss"))
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value       Pr(>|t|)    
## (Intercept)  0.0077134  0.0137846  0.5596      0.5760131    
## d83         -0.0998658  0.0222563 -4.4871 0.000008865291 ***
## d84         -0.1478033  0.0361016 -4.0941 0.000049010535 ***
## d85         -0.1524144  0.0513017 -2.9709      0.0031038 ** 
## d86         -0.1249001  0.0633224 -1.9724      0.0490789 *  
## d87         -0.0840734  0.0785015 -1.0710      0.2846678    
## lprbarr     -0.3274942  0.0564281 -5.8037 0.000000011184 ***
## lprbconv    -0.2381066  0.0395843 -6.0152 0.000000003356 ***
## lprbpris    -0.1650463  0.0457923 -3.6042      0.0003427 ***
## lavgsen     -0.0217606  0.0258200 -0.8428      0.3997305    
## lpolpc       0.3984264  0.1029342  3.8707      0.0001221 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Errores Bootstrap

Como explican Efron y Tibshirani, las explicaciones del bootstrap y otros métodos computacionales involucran las ideas de inferencia estadistica tradicional. Las ideas báscias no han cambiado pero la implementación de estas sí.

Los tres conceptos básicos de estadística son:

  1. Recolección de datos,

  2. resúmenes (o descriptivos) de datos y

  3. inferencia.

Para hacer eso empezaremos con datos clinicos relativos a un experimento que queria ver la relacion entre consumo de aspirina y ataques cardiacos:

trial <- data_frame(patient = 1:22071, 
  group = ifelse(patient <= 11037, "aspirin", "control"), 
  heart_attack = c(rep(TRUE, 104), rep(FALSE, 10933), rep(TRUE, 189), 
      rep(FALSE, 10845)))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
trial
## # A tibble: 22,071 x 3
##    patient group   heart_attack
##      <int> <chr>   <lgl>       
##  1       1 aspirin TRUE        
##  2       2 aspirin TRUE        
##  3       3 aspirin TRUE        
##  4       4 aspirin TRUE        
##  5       5 aspirin TRUE        
##  6       6 aspirin TRUE        
##  7       7 aspirin TRUE        
##  8       8 aspirin TRUE        
##  9       9 aspirin TRUE        
## 10      10 aspirin TRUE        
## # ... with 22,061 more rows

Y calculamos el cociente de las tasas:

summ_stats <- trial %>% 
  group_by(group) %>%
  summarise(
    n_attacks = sum(heart_attack), 
    n_subjects = n(),
    rate_attacks = n_attacks / n_subjects * 100
  )
summ_stats
## # A tibble: 2 x 4
##   group   n_attacks n_subjects rate_attacks
##   <chr>       <int>      <int>        <dbl>
## 1 aspirin       104      11037        0.942
## 2 control       189      11034        1.71

Y el ratio:

ratio_rates <- summ_stats$rate_attacks[1] / summ_stats$rate_attacks[2]
ratio_rates
## [1] 0.550115

Después calculamos 1000 replicaciones bootstrap de \(\hat{\theta^*}\):

boot_ratio_rates <- function(){
  boot_sample <- trial %>%
    group_by(group) %>%
    sample_frac(replace = TRUE)
  rates <- boot_sample %>% 
      summarise(rate_attacks = sum(heart_attack) / n()) %>%
      pull(rate_attacks)
  rates[1] / rates[2]
} 

boot_ratio_rates <- rerun(1000, boot_ratio_rates()) %>% 
  map_dbl(~.x)

Las replicaciones se pueden utilizar para hacer inferencia de los datos. Por ejemplo, podemos estimar el error estándar de \(\theta\):

se <- sd(boot_ratio_rates)
se
## [1] 0.06675378

calculo errores estandard

Tenemos una muestra aleatoria \(\textbf{x}=(x_1,x_2,...,x_n)\)proveniente de una distribución de probabilidad desconocida
\(P\).

  1. Seleccionamos muestras aleatorias con reemplazo de la distribución empírica.

  2. Calculamos la estadística de interés para cada muestra: \(\hat{\theta}=s(\textbf{x})\)

  3. La distribución de la estadística es la distribución bootstrap, y el estimador bootstrap del error estándar es la desviación estándar de la distribución bootstrap.

Ahora vemos eso en mas detalle.Antes preparamos los datos;

library(LaplacesDemon)
## 
## Attaching package: 'LaplacesDemon'
## The following object is masked from 'package:purrr':
## 
##     partial
## The following object is masked from 'package:forecast':
## 
##     is.constant
## The following object is masked from 'package:tseries':
## 
##     read.matrix
## The following object is masked from 'package:car':
## 
##     logit
library(patchwork)
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:MASS':
## 
##     area
# En este ejemplo la población es una mezcla de normales
pob_plot <- ggplot(data_frame(x = -15:20), aes(x)) +
    stat_function(fun = dnormm, args = list(p = c(0.3, 0.7), mu = c(-2, 8), 
        sigma = c(3.5, 3)), alpha = 0.8) +
    geom_vline(aes(color = "mu", xintercept = 5), alpha = 0.5) +
    scale_colour_manual(values = c('mu' = 'red'), name = '', 
        labels = expression(mu)) +
    labs(x = "", subtitle = "Población", color = "")

samples <- data_frame(sample = 1:3) %>% 
    mutate(
        sims = rerun(3, rnormm(30, p = c(0.3, 0.7), mu = c(-2, 8), 
            sigma = c(3.5, 3))), 
        x_bar = map_dbl(sims, mean))
muestras_plot <- samples %>% 
    unnest() %>% 
    ggplot(aes(x = sims)) +
        geom_histogram(binwidth = 2, alpha = 0.5, fill = "darkgray") +
        geom_vline(xintercept = 5, color = "red", alpha = 0.5) +
        geom_segment(aes(x = x_bar, xend = x_bar, y = 0, yend = 0.8), 
            color = "blue") +
        xlim(-15, 20) +
        facet_wrap(~ sample) +
        geom_text(aes(x = x_bar, y = 0.95, label = "bar(x)"), parse = TRUE, 
            color = "blue", alpha = 0.2, hjust = 1) +
        labs(x = "", subtitle = "Muestras") 
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(sims)`
samples_dist <- data_frame(sample = 1:10000) %>% 
    mutate(
        sims = rerun(10000, rnormm(100, p = c(0.3, 0.7), mu = c(-2, 8), 
            sigma = c(3.5, 3))), 
        mu_hat = map_dbl(sims, mean))
dist_muestral_plot <- ggplot(samples_dist, aes(x = mu_hat)) +
    geom_density(adjust = 2) +
    labs(x = "", subtitle = expression("Distribución muestral de "~hat(mu))) +
    geom_vline(xintercept = 5, color = "red", alpha = 0.5)

(pob_plot | plot_spacer()) / (muestras_plot | dist_muestral_plot) 
## Warning: Removed 3 rows containing missing values (geom_bar).

Creamos una distribucion empirica:

dist_empirica <- data_frame(id = 1:30, obs = samples$sims[[1]])

Analizamos como se distribuye:

dist_empirica_plot <- ggplot(dist_empirica, aes(x = obs)) +
    geom_histogram(binwidth = 2, alpha = 0.5, fill = "darkgray") +
    geom_vline(aes(color = "mu", xintercept = 5), alpha = 0.5) +
    geom_vline(aes(xintercept = samples$x_bar[1], color = "x_bar"), 
        alpha = 0.8, linetype = "dashed") +
    xlim(-15, 20) +
    geom_vline(xintercept = 5, color = "red", alpha = 0.5) +
    labs(x = "", subtitle = expression("Distribución empírica"~P[n])) +
    scale_colour_manual(values = c('mu' = 'red', 'x_bar' = 'blue'), name = '', 
        labels = c(expression(mu), expression(bar(x)))) 

Creamos ahora un muestreo de la misma:

samples_boot <- data_frame(sample_boot = 1:3) %>% 
    mutate(
        sims_boot = rerun(3, sample(dist_empirica$obs, replace = TRUE)), 
        x_bar_boot = map_dbl(sims_boot, mean))

muestras_boot_plot <- samples_boot %>% 
    unnest() %>% 
    ggplot(aes(x = sims_boot)) +
        geom_histogram(binwidth = 2, alpha = 0.5, fill = "darkgray") +
        geom_vline(aes(xintercept = samples$x_bar[1]), color = "blue",
            linetype = "dashed", alpha = 0.8) +
        geom_vline(xintercept = 5, color = "red", alpha = 0.5) +
        geom_segment(aes(x = x_bar_boot, xend = x_bar_boot, y = 0, yend = 0.8), 
            color = "black") +
        xlim(-15, 20) +
        facet_wrap(~ sample_boot) +
        geom_text(aes(x = x_bar_boot, y = 0.95, label = "bar(x)^'*'"), 
            parse = TRUE, color = "black", alpha = 0.3, hjust = 1) +
        labs(x = "", subtitle = "Muestras bootstrap") 
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(sims_boot)`

Vamos a evaluar como se distribuye con boot:

boot_dist <- data_frame(sample = 1:10000) %>% 
    mutate(
        sims_boot = rerun(10000, sample(dist_empirica$obs, replace = TRUE)), 
        mu_hat_star = map_dbl(sims_boot, mean))
boot_muestral_plot <- ggplot(boot_dist, aes(x = mu_hat_star)) +
    geom_histogram(alpha = 0.5, fill = "darkgray") +
    labs(x = "", 
        subtitle = expression("Distribución bootstrap de "~hat(mu)^'*')) +
    geom_vline(xintercept = 5, color = "red", alpha = 0.5) +
    geom_vline(aes(xintercept = samples$x_bar[1]), color = "blue", 
        linetype = "dashed", alpha = 0.8)

(dist_empirica_plot | plot_spacer()) / (muestras_boot_plot | boot_muestral_plot) 
## Warning: Removed 1 rows containing missing values (geom_bar).
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

funciones especificas

Importamos e instalamos la paqueteria para Bootstrap

library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:LaplacesDemon':
## 
##     logit
## The following object is masked from 'package:survival':
## 
##     aml
## The following object is masked from 'package:car':
## 
##     logit

Ejemplo I

Importmaos la base

hsb2 <- read.table("https://stats.idre.ucla.edu/stat/data/hsb2.csv", sep=",", header=T)

Antes de usar boot tenemos que definir una funcion que regrese el estadistico de nuestro interes.

En este caso, nos interesa la correlacion entre el coeficiente write y el coeficiente math

fc <- function(d, i){
  d2 <- d[i,]
  return(cor(d2$write, d2$math))
}

Ahora si podemos usar el comando boot que es compuesto por los siguientes argumentos:

  1. dataset name
  2. function fc
  3. numeros muestras de bootstrap

Ponemos la semilla para tener los mismos resultados

set.seed(626)
bootcorr <- boot(hsb2, fc, R=500)
bootcorr
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = hsb2, statistic = fc, R = 500)
## 
## 
## Bootstrap Statistics :
##      original      bias    std. error
## t1* 0.6174493 -0.00388133   0.0430155

Podemos obtener informacion adicional al respecto:

summary(bootcorr)
##     R original   bootBias   bootSE bootMed
## 1 500  0.61745 -0.0038813 0.043015 0.61799

O:

class(bootcorr)
## [1] "boot"

Yel relativo plot:

plot(bootcorr)

Para obtener los intervalos de confianza tengo una funcion especifica:

boot.ci(boot.out = bootcorr, type = c("norm", "basic", "perc", "bca"))

Ejemplo II

Importamos los datos:

data(city)

Definimos, como antes, la funcion. Notamos que aqui esta definiendo una funcion distinta. Esto se debe a que en este ejercicio el interes era estimar el ratio entre las medias:

ratio <- function(d, w) sum(d$x * w)/sum(d$u * w)

Utilizamos la funcion Boot:

bootrat <- boot(city, ratio, R=999, stype="w")

Obtenemos nuestras estadisticas descriptivas:

class(bootrat)
## [1] "boot"

Y el plot:

plot(bootrat)