/* written paper 1 fall 2007 */
use earningsdata_males
gen gender = 1
append using earningsdata_females
replace gender = 2 if gender==.
label define genderlbl 1 "male" 2 "female"
label values gender genderlbl
/* inspect the data */
desc
summ
/* question 1 */
summ ln_y_ if gender==1
return list
gen mean_male = r(mean)
gen N_male = r(N)
gen sd_mean_male = r(sd)/sqrt(N_male)
summ ln_y_ if gender==2
return list
gen mean_female = r(mean)
gen N_female = r(N)
gen sd_mean_female = r(sd)/sqrt(N_female)
summ mean_* sd_* N_*
/* confidence intervals for the respective means */
gen ci_mean_male_lower = mean_male - 1.64*sd_mean_male
gen ci_mean_male_upper = mean_male + 1.64*sd_mean_male
disp "90% confidence interval for male log earnings is [" ci_mean_male_lower "," ci_mean_male_upper "]"
gen ci_mean_male_lower5 = mean_male - 1.96*sd_mean_male
gen ci_mean_male_upper5 = mean_male + 1.96*sd_mean_male
disp "95% confidence interval for male log earnings is [" ci_mean_male_lower5 "," ci_mean_male_upper5 "]"
gen ci_mean_female_lower = mean_female - 1.64*sd_mean_female
gen ci_mean_female_upper = mean_female + 1.64*sd_mean_female
disp "90% confidence interval for female log earnings is [" ci_mean_female_lower "," ci_mean_female_upper "]"
gen ci_mean_female_lower5 = mean_female - 1.96*sd_mean_female
gen ci_mean_female_upper5 = mean_female + 1.96*sd_mean_female
disp "95% confidence interval for female log earnings is [" ci_mean_female_lower5 "," ci_mean_female_upper5 "]"
/* test whether the means are different */
gen diff = mean_male - mean_female
gen se_diff = sqrt((sd_mean_male^2)+(sd_mean_female^2))
gen t = diff/se_diff
disp t
/* p-value of test of equal means */
gen p = 2*normal(-abs(t)) /* norm in stata8 */
disp p
/* the p-value is less than 0.01, se we reject the H0 that the means are equal at the 1% level */
/************************************************************************************************************/
/* question 2 */
egen mean_sample = mean(ln_y_)
gen temp = exp(mean_sample)
gen earnings = exp(ln_y_)
egen mean_earnings = mean(earnings)
/* mean earnings are not equal to exp(ln(mean earnings)) since ln is not a linear operator */
/************************************************************************************************************/
/* question 3 */
reg ln_y_ s if gender==1
gen alpha_male = _b[_cons]
gen beta_male = _b[s]
gen se_male = _se[s]
ereturn list
gen r2_male = e(r2)
/* make graph */
twoway scatter ln_y_ s if gender==1||lfit ln_y_ s if gender==1, legend(off)
reg ln_y_ s if gender==2
gen alpha_female = _b[_cons]
gen beta_female = _b[s]
gen se_female = _se[s]
ereturn list
gen r2_female = e(r2)
/* make graph */
twoway scatter ln_y_ s if gender==2||lfit ln_y_ s if gender==2, legend(off)
/* OLS assumptions are: */
/* (1) E(ui|Xi)=0 */
/* (2) Xi, Yi for i = 1,...,n are iid draws from their joint distribution */
/* (3) Large outliers are unlikely: 01.96 */
gen t_female = _b[s_female]/_se[s_female]
disp t_female
gen p_female = 2*normal(-abs(t_female)) /* norm in stata8 */
disp p_female
/* use an F-test for the two schooling coefficients being the same */
gen d_male = gender==1
gen s_male = s*d_male
reg ln_y_ d_male d_female s_male s_female, noconst
test s_female = s_male
/************************************************************************************************************/
/* question 8 */
/* other specifications */
/* only experience */
reg ln_y_ e e_2 if gender==1
/* only private/public sector */
reg ln_y_ public servi if gender==1, noconst
/* only type of education */
reg ln_y_ unsp-serv if gender==1
/* only region */
reg ln_y_ ostf-finmark if gender==1