Aggregate census education data (estimate and Margin of Error)
Data for this exercise can be downloaded here: ACS2010.
# Read data file
dat = read.csv("ACS2010.csv", header=T)
attach(dat)
# Verify sums
plot(B15002_001E ~ eval(B15002_002E + B15002_019E)) # check that pop = M + F
# Sums of errors are the sqrt of the sums of the SE squared
# SE can be computed from MoE / 1.645
# src: 'American Community Survey Accuracy of the Data (2009)'
# Compute percentage with bachelors degree (or greater)
# Estimate
numE = B15002_015E + B15002_016E + B15002_017E + B15002_018E +
B15002_032E + B15002_033E + B15002_034E + B15002_035E
# Standard error
numSE = sqrt( (B15002_015M^2 + B15002_016M^2 + B15002_017M^2 + B15002_018M^2 +
B15002_032M^2 + B15002_033M^2 + B15002_034M^2 + B15002_035M^2) / 1.645^2)
# Percent with Bachelor's or greater
percE_E = numE / B15002_001E
# Compute the two radical term
rad1 = numSE^2
rad2 = (numE^2 / B15002_001E^2) * (B15002_001M / 1.645)^2
# Identify records for which the radical is less than 0
A = rad1 >= rad2
B = !A # The inverse of A
# If the radical term is negative, use an alternate form of the equation (equation for ratios)
percE_SE = 1/B15002_001E * sqrt( A * (rad1 - rad2 ) + B * (rad1 + rad2))
percE_MoE = percE_SE * 1.645
# Output income and percent with Bachelor's or greater
write.csv(cbind(GEOID_DBL,B19301_001E,B19301_001M,percE_E,percE_MoE),"inc_ed.csv",na="")
detach(dat)