Files changed (1) hide show
  1. Oct CPS original → Oct CPS modified by Freddie&Jieyi +136 -53
Oct CPS original → Oct CPS modified by Freddie&Jieyi RENAMED
@@ -7,41 +7,42 @@
7
  from the October CPS .
8
  2. CREATE AN ANALYSIS FILE age_profile_enrolled_students_cps_oct COLLAPSED BY SURVEY YEAR AND AGE GROUP TO GENERATE TABLES AND FIGURES DESCRIBING AGGREGATE TRENDS IN SCHOOL ATTENDANCE
9
  3. CREATE AN ANALYSIS FILE enrollment_analysis_cps_oct COLLAPSED BY BIRTH COHORT AND 5-year age group TO DESCRIBE AGGREGATE TRENDS IN SCHOOL ATTENDANCE
10
  4. CREATE AN ANALYSIS FILE age_profile_enrolled_CA_students_cps_oct that includes only respondents residing in the state of California to benchmark age enrollment figures from the CSU AND CCCC systems with survey data from the October CPS.
11
  ******NOTE: You will need to change the directory to your own working directory.***/
12
 
13
 
14
  /* 1. CREATES AN ANALYSIS FILE main_analysis_cps_october_76_19 */
15
 
16
 
17
  /***********************************************************************************************************/
18
  /***********************************************************************************************************/
19
  /***************************** Read in October Education Supplement CPS Data from Multiple Years ******************/
20
  /***********************************************************************************************************/
21
  /***********************************************************************************************************/
22
 
23
 
24
  # delimit cr
25
  clear all
26
 
 
27
- cd "C:\Users\cmslo\Box Sync\race project"
28
  *cd "D:\Box Sync\race project"
29
 
30
 
31
  set more off
32
 
33
  clear
34
  quietly infix ///
35
  int year 1-4 ///
36
  long serial 5-9 ///
37
  byte month 10-11 ///
38
  double hwtfinl 12-21 ///
39
  double cpsid 22-35 ///
40
  byte asecflag 36-36 ///
41
  byte hflag 37-37 ///
42
  double asecwth 38-47 ///
43
  byte region 48-49 ///
44
  byte statefip 50-51 ///
45
  byte pernum 52-53 ///
46
  double wtfinl 54-67 ///
47
  double cpsidp 68-81 ///
@@ -3489,271 +3490,349 @@
3489
  label define edstudy_lbl 18 `"No major"', add
3490
  label define edstudy_lbl 19 `"Undecided"', add
3491
  label define edstudy_lbl 96 `"Refusal"', add
3492
  label define edstudy_lbl 97 `"Don't know"', add
3493
  label define edstudy_lbl 98 `"No response"', add
3494
  label define edstudy_lbl 99 `"Not in Universe"', add
3495
  label values edstudy edstudy_lbl
3496
 
3497
  label define qedschus_lbl 1 `"Allocated"'
3498
  label define qedschus_lbl 0 `"Not Allocated"', add
3499
  label values qedschus qedschus_lbl
3500
 
3501
  label define qedschusyrs_lbl 0 `"Not Allocated"'
3502
  label define qedschusyrs_lbl 1 `"Allocated"', add
3503
  label values qedschusyrs qedschusyrs_lbl
3504
 
3505
  label define qedstudy_lbl 1 `"Allocated"'
3506
  label define qedstudy_lbl 0 `"Not Allocated"', add
3507
  label values qedstudy qedstudy_lbl
3508
 
3509
-
3510
  drop if year == 2020
 
3511
  keep if month == 10
 
3512
- keep if bpl == 09900 /*keep US-born people only */
 
 
3513
- /*keep people who live in 50 states: need to go back and grab state and region vars*/
3514
- keep if statefip >= 1 & statefip <= 56
3515
-
3516
-
3517
- /*Data Imputation Flags*/
 
 
 
3518
  #delimit cr
3519
  gen imputed_flag = 0
3520
  foreach var of varlist qage qsex qrace {
3521
  replace imputed_flag = 1 if `var' != 0
3522
  }
3523
- replace imputed_flag = . if year <1982 /*for CPS supplements other than the ASEC, values are imputed for all years 1982 forward*/
3524
-
3525
- /*note about IPW for this sample:
3526
- we preserve sex, race, and age joint distribution. because we are focusing on child outcomes here, we have to contend with differing sampling frames in the CPS with respect to age and attainment.
3527
- attainment questions were only asked of children over 15 in the early years of the cps. thus, unlike other supplements in doing IPW in this supplement, we do not use attainment in the IPW. This is a non-issue in other samples where we focus on adults*/
3528
-
 
 
 
 
 
 
 
 
3529
  egen trq = group(age sex race)
3530
  egen num = sum(imputed_flag*edsuppwt), by(trq)
3531
  egen den = sum(edsuppwt), by(trq)
3532
  gen phat = num/den
3533
  drop if imputed_flag == 1
3534
  gen weight = edsuppwt/(1-phat)
3535
  replace weight = edsuppwt if year <1982
 
3536
 
3537
  /*check sample before drop*/
 
3538
  drop if weight < 0
3539
 
 
3540
  save cps_october_76_19, replace
3541
 
 
3542
 
3543
  # delimit cr
3544
  clear all
3545
 
 
3546
- cd "C:\Users\cmslo\Box Sync\race project"
3547
  *cd "D:\Box Sync\race project"
3548
 
3549
  use "cps_october_76_19"
3550
 
3551
-
3552
  /*Identify if someone is currently enrolled in school: universe for adults starts in 1978*/
3553
 
 
 
 
 
3554
  gen inschool = 0
3555
  replace inschool = 1 if edatt == 1
3556
  replace inschool = . if year < 1978
3557
 
 
 
3558
  gen inschool_ly = 0
3559
  replace inschool_ly = 1 if edattly == 1
3560
- replace inschool_ly = . if year <1987
 
3561
-
 
3562
  gen inhighschool = 0
3563
- replace inhighschool = 1 if edgrade>= 201 & edgrade <=204
3564
  replace inhighschool = . if year < 1978
3565
 
 
 
3566
  gen incollege = 0
3567
- replace incollege = 1 if edgrade>= 301 & edgrade <=304
3568
  replace incollege = . if year < 1978
3569
 
 
 
3570
  gen inyr1college = 0
3571
  replace inyr1college = 1 if edgrade== 301
3572
  replace inyr1college = . if year < 1978
3573
 
 
 
3574
  gen ingradschool = 0
3575
  replace ingradschool = 1 if edgrade>= 401 & edgrade <= 402
3576
  replace ingradschool = . if year < 1978
3577
 
3578
- /*make a postsecondary indicator that includes undergrad or grad school to have something to compare to IPEDS enrollments*/
3579
  gen inpostsec = 0
3580
  replace inpostsec = 1 if incollege == 1 | ingradschool == 1
3581
  replace inpostsec = . if year < 1978
3582
 
 
 
3583
  gen innondegree = 0
3584
- replace innondegree = 1 if edvoca == 2 /*check the crosstab of this one with college*/
3585
  replace innondegree = . if year < 1987
3586
  replace innondegree = . if incollege == 1
3587
  replace innondegree = . if inhighschool == 1
3588
  replace innondegree = . if ingradschool == 1
3589
-
 
 
 
3590
  gen inhighschool_ly = 0
3591
  replace inhighschool_ly = 1 if edgrdly>= 201 & edgrdly <=204
3592
  replace inhighschool_ly = . if year < 1987
3593
 
 
 
3594
  gen incollege_ly = 0
3595
  replace incollege_ly = 1 if edgrdly>= 301 & edgrdly <=304
3596
  replace incollege_ly = . if year < 1987
3597
 
 
 
3598
  gen inyr1college_ly = 0
3599
  replace inyr1college_ly = 1 if edgrdly== 301
3600
  replace inyr1college_ly = . if year < 1987
3601
 
 
 
3602
  gen ingradschool_ly = 0
3603
  replace ingradschool_ly = 1 if edgrdly>= 401 & edgrdly <= 402
3604
  replace ingradschool_ly = . if year < 1988
3605
 
 
 
3606
  gen public = 0
3607
  replace public = 1 if edpupr == 10
3608
  replace public = . if year <1978
3609
 
 
 
3610
  gen private = 0
3611
  replace private = 1 if edpupr == 11
3612
  replace private = . if year <1978
3613
 
 
3614
  gen incollege_public = incollege * public
3615
  replace incollege_public = . if year < 1978
3616
 
 
3617
  gen incollege_private = incollege * private
3618
  replace incollege_private = . if year < 1978
3619
 
 
 
3620
  gen fulltime = 0
3621
  replace fulltime = 1 if edfull == 1
3622
  replace fulltime = . if year < 1978
3623
 
 
 
3624
  gen parttime = 0
3625
  replace parttime = 1 if edfull == 2
3626
  replace parttime = . if year < 1978
3627
 
 
3628
  gen incollege_fulltime = incollege * fulltime
3629
  replace incollege_fulltime = . if year < 1978
3630
 
 
3631
  gen incollege_parttime = incollege * parttime
3632
  replace incollege_parttime = . if year < 1978
3633
 
 
 
3634
  gen two_yr = 0
3635
  replace two_yr = 1 if edtype == 1
3636
  replace two_yr = . if year <1978
3637
 
 
 
3638
  gen four_yr = 0
3639
  replace four_yr = 1 if edtype == 2
3640
  replace four_yr = . if year <1978
3641
 
 
3642
  gen incollege_two = incollege * two_yr
3643
  replace incollege_two = . if year < 1978
3644
 
 
3645
  gen incollege_four = incollege * four_yr
3646
  replace incollege_four = . if year < 1978
3647
 
 
 
3648
  gen nilf = 0
3649
  replace nilf= 1 if empstat >= 30
3650
 
 
3651
  gen incollege_nilf = incollege * nilf
 
 
3652
  gen incollege_nilf_full = incollege * nilf * fulltime
 
 
 
3653
  gen incollege_nilf_part = incollege * nilf * parttime
3654
 
 
 
 
3655
  gen wage_worker = 0
3656
  replace wage_worker = 1 if classwkr >=20 & classwkr <= 28
3657
  replace wage_worker = 0 if nilf == 1
3658
 
 
3659
  gen incollege_wageworker = incollege * wage_worker
3660
 
3661
-
3662
  /*make birth cohort vars*/
3663
-
3664
  gen byear = year - age
3665
-
3666
  gen birth_cohort = 1915 if byear >= 1913 & byear <=1917
3667
  replace birth_cohort = 1920 if byear >= 1918 & byear <=1922
3668
  replace birth_cohort = 1925 if byear >= 1923 & byear <=1927
3669
  replace birth_cohort = 1930 if byear >= 1928 & byear <=1932
3670
  replace birth_cohort = 1935 if byear >= 1933 & byear <=1937
3671
  replace birth_cohort = 1940 if byear >= 1938 & byear <=1942
3672
  replace birth_cohort = 1945 if byear >= 1943 & byear <=1947
3673
  replace birth_cohort = 1950 if byear >= 1948 & byear <=1952
3674
  replace birth_cohort = 1955 if byear >= 1953 & byear <=1957
3675
  replace birth_cohort = 1960 if byear >= 1958 & byear <=1962
3676
  replace birth_cohort = 1965 if byear >= 1963 & byear <=1967
3677
  replace birth_cohort = 1970 if byear >= 1968 & byear <=1972
3678
  replace birth_cohort = 1975 if byear >= 1973 & byear <=1977
3679
  replace birth_cohort = 1980 if byear >= 1978 & byear <=1982
3680
  replace birth_cohort = 1985 if byear >= 1983 & byear <=1987
3681
  replace birth_cohort = 1990 if byear >= 1988 & byear <=1992
3682
 
 
3683
  gen a20 = 0
3684
  replace a20 = 1 if age >= 18 & age <= 24
3685
 
3686
  gen a25 = 0
3687
  replace a25 = 1 if age >= 25 & age <= 29
3688
 
3689
  gen a30 = 0
3690
  replace a30 = 1 if age >= 30 & age <= 34
3691
 
3692
  gen a35 = 0
3693
  replace a35 = 1 if age >= 35 & age <= 39
3694
 
3695
  gen a40 = 0
3696
  replace a40 = 1 if age >= 40 & age <= 44
3697
 
3698
  gen a45 = 0
3699
  replace a45 = 1 if age >= 45 & age <= 49
3700
 
3701
  gen a50 = 0
3702
  replace a50 = 1 if age >= 50 & age <= 54
3703
 
3704
  gen a55 = 0
3705
  replace a55 = 1 if age >= 55 & age <= 59
3706
 
3707
  gen a60 = 0
3708
  replace a60 = 1 if age >= 60 & age <= 64
3709
 
3710
  gen a65 = 0
3711
  replace a65 = 1 if age >= 65 & age <= 69
3712
 
 
3713
  gen a45_55 = 0
3714
  replace a45_55 = 1 if age>=43 & age <=57
3715
 
 
3716
  gen age_5 = 0
3717
- replace age_5 = 20 if a20 == 1
3718
  replace age_5 = 25 if a25 == 1
3719
  replace age_5 = 30 if a30 == 1
3720
  replace age_5 = 35 if a35 == 1
3721
  replace age_5 = 40 if a40 == 1
3722
  replace age_5 = 45 if a45 == 1
3723
  replace age_5 = 50 if a50 == 1
3724
  replace age_5 = 55 if a55 == 1
3725
  replace age_5 = 60 if a60 == 1
3726
  replace age_5 = 65 if a65 == 1
3727
 
 
3728
  gen male = 0
3729
  replace male = 1 if sex == 1
3730
 
3731
  gen female = 0
3732
  replace female = 1 if sex == 2
3733
 
 
 
3734
  gen white = 0
3735
  replace white = 1 if race == 100 & hispan == 0
3736
 
3737
  gen black = 0
3738
  replace black = 1 if race == 200 & hispan == 0
3739
 
 
 
3740
  gen super_state = 0
3741
  replace super_state = 32 if statefip == 1
3742
  replace super_state = 42 if statefip == 2
3743
  replace super_state = 41 if statefip == 4
3744
  replace super_state = 33 if statefip == 5
3745
  replace super_state = 1 if statefip == 6
3746
  replace super_state = 41 if statefip == 8
3747
  replace super_state = 81 if statefip == 9
3748
  replace super_state = 31 if statefip == 10
3749
  replace super_state = 31 if statefip == 11
3750
  replace super_state = 3 if statefip == 12
3751
  replace super_state = 8 if statefip == 13
3752
  replace super_state = 42 if statefip == 15
3753
  replace super_state = 41 if statefip == 16
3754
  replace super_state = 6 if statefip == 17
3755
  replace super_state = 21 if statefip == 18
3756
  replace super_state = 22 if statefip == 19
3757
  replace super_state = 22 if statefip == 20
3758
  replace super_state = 32 if statefip == 21
3759
  replace super_state = 33 if statefip == 22
@@ -3773,92 +3852,95 @@
3773
  replace super_state = 4 if statefip == 36
3774
  replace super_state = 9 if statefip == 37
3775
  replace super_state = 22 if statefip == 38
3776
  replace super_state = 7 if statefip == 39
3777
  replace super_state = 33 if statefip == 40
3778
  replace super_state = 42 if statefip == 41
3779
  replace super_state = 5 if statefip == 42
3780
  replace super_state = 81 if statefip == 44
3781
  replace super_state = 31 if statefip == 45
3782
  replace super_state = 22 if statefip == 46
3783
  replace super_state = 32 if statefip == 47
3784
  replace super_state = 2 if statefip == 48
3785
  replace super_state = 41 if statefip == 49
3786
  replace super_state = 81 if statefip == 50
3787
  replace super_state = 12 if statefip == 51
3788
  replace super_state = 42 if statefip == 53
3789
  replace super_state = 31 if statefip == 54
3790
  replace super_state = 31 if statefip == 55
3791
  replace super_state = 21 if statefip == 56
3792
  assert super_state > 0
3793
- lab def ss 1 "CA" 2 "TX" 3 "FL" 4 "NY" 5 "PA" 6 "IL" 7 "OH" 8 "GA" 9 "NC" 10 "MI" 11 "NJ" 12 "VA" 21 "East North Central" 22 "West North Central" /*
3794
- */ 31 "South Atlantic" 32 "East South Central" 33 "West South Central" 41 "Mountain" 42 "Pacific" 81 "New England"
3795
  lab val super_state ss
3796
 
3797
-
3798
-
3799
  save main_analysis_cps_october_76_19, replace
3800
-
3801
 
3802
  /*2. CREATE AN ANALYSIS FILE age_profile_enrolled_students_cps_oct COLLAPSED BY SURVEY YEAR AND AGE GROUP TO GENERATE TABLES AND FIGURES DESCRIBING AGGREGATE TRENDS IN SCHOOL ATTENDANCE*/
3803
  /*First, we want to describe within a survey year what share of students are adult students-- this is to make analagous pictures to the IPEDS pictures.
3804
  This file creates variables that decompose school attendance within a survey year by age group to answer questions such as: "in a given year, what share of all students attending school were between ages 18 and 24?"*/
3805
 
3806
  # delimit cr
3807
  clear all
3808
 
 
3809
- cd "C:\Users\cmslo\Box Sync\race project"
3810
  *cd "D:\Box Sync\race project"
3811
 
3812
  use "main_analysis_cps_october_76_19"
3813
 
 
3814
- keep if inrange(age, 18, 69)
 
3815
-
 
3816
  gen count = 1
3817
 
3818
- collapse (sum) count inschool inhighschool incollege inyr1college ingradschool inpostsec innondegree incollege_public incollege_private incollege_fulltime incollege_parttime incollege_two incollege_four incollege_nilf incollege_nilf_full incollege_nilf_part incollege_wageworker [pw=weight], by(year age_5)
3819
-
 
 
3820
  drop if age_5 == .
3821
 
3822
- /*make variables that tells us the total number of people in school, in high school, in college, and in grad school in our sample */
3823
 
3824
  sort year
3825
 
3826
  by year: egen inschool_sample = total(inschool)
3827
 
3828
  by year: egen inhighschool_sample = total(inhighschool)
3829
 
3830
  by year: egen incollege_sample = total(incollege)
3831
 
3832
  by year: egen ingradschool_sample = total(ingradschool)
3833
 
3834
  by year: egen inpostsec_sample = total(inpostsec)
3835
 
3836
  by year: egen inpublic_sample = total(incollege_public)
3837
 
3838
  by year: egen inprivate_sample = total(incollege_private)
3839
 
3840
  by year: egen inyr1_sample = total(inyr1college)
3841
 
3842
  by year: egen in4yr_sample = total(incollege_four)
3843
 
3844
- /*make vars that tell us the age profile of all adult students */
3845
 
3846
  gen age_sh_school = inschool / inschool_sample
3847
  format age_sh_school %6.4f
3848
 
3849
  gen age_sh_highschool = inhighschool / inhighschool_sample
3850
  format age_sh_highschool %6.4f
3851
 
3852
  gen age_sh_college = incollege / incollege_sample
3853
  format age_sh_college %6.4f
3854
 
3855
  gen age_sh_public = incollege_public / inpublic_sample
3856
  format age_sh_public %6.4f
3857
 
3858
  gen age_sh_private = incollege_private / inprivate_sample
3859
  format age_sh_private %6.4f
3860
 
3861
  gen age_sh_gradschool = ingradschool / ingradschool_sample
3862
  format age_sh_gradschool %6.4f
3863
 
3864
  gen age_sh_postsec = inpostsec / inpostsec_sample
@@ -3870,57 +3952,56 @@
3870
  gen age_sh_4yr = incollege_four / in4yr_sample
3871
  format age_sh_4yr %6.4f
3872
 
3873
  save age_profile_enrolled_students_cps_oct, replace
3874
 
3875
 
3876
  /*3. CREATE AN ANALYSIS FILE enrollment_analysis_cps_oct COLLAPSED BY BIRTH COHORT AND 5-year age group TO DESCRIBE AGGREGATE TRENDS IN SCHOOL ATTENDANCE*/
3877
 
3878
  /*The previous analysis file decomposes all school enrollment in a survey year by age group to answer questions like: "in a given year, what share of all students attending school were between ages 18 and 24?"
3879
 
3880
  In contrast, this analysis file describes school attendance patterns within a 5-year birth cohort at a specific age to answer questions like: "what share of the 1970 birth cohort attended school between ages 18 and 24?"
3881
 
3882
  The "share variables" answer those types of questions.
3883
 
3884
  Additionally, we also create share variables that are conditioned on attempt, and variables to describe the population of enrolled students.
3885
  */
3886
 
3887
  # delimit cr
3888
  clear all
3889
 
 
3890
- cd "C:\Users\cmslo\Box Sync\race project"
3891
  *cd "D:\Box Sync\race project"
3892
 
3893
  use "main_analysis_cps_october_76_19"
3894
 
3895
- * Limiting age between 23 and 69
3896
- *
3897
  keep if inrange(age, 23, 69)
3898
 
3899
  gen count = 1
3900
-
3901
- collapse (sum) count inschool inhighschool incollege inyr1college ingradschool innondegree incollege_public incollege_private incollege_fulltime incollege_parttime incollege_two incollege_four incollege_nilf incollege_nilf_full incollege_nilf_part incollege_wageworker [pw=weight], by(birth_cohort age_5)
3902
 
3903
  drop if birth_cohort == .
3904
 
3905
- /*make some share variables to describe schooling patterns within age/ birth-cohort*/
3906
-
3907
  gen share_inschool = .
3908
  replace share_inschool = inschool / count if count != 0
3909
  format share_inschool %6.4f
3910
 
3911
  gen share_inhighschool = .
3912
  replace share_inhighschool = inhighschool / count if count != 0
3913
  format share_inhighschool %6.4f
3914
 
3915
  gen share_incollege = .
3916
  replace share_incollege = incollege / count if count != 0
3917
  format share_incollege %6.4f
3918
 
3919
  gen share_ingradschool = .
3920
  replace share_ingradschool = ingradschool / count if count != 0
3921
  format share_ingradschool %6.4f
3922
 
3923
  gen share_innondegree = .
3924
  replace share_innondegree = innondegree / count if count != 0
3925
  format share_innondegree %6.4f
3926
 
@@ -3978,50 +4059,52 @@
3978
 
3979
  gen cond_incollege_nilf_part = .
3980
  replace cond_incollege_nilf_part = incollege_nilf_part / incollege if incollege != 0
3981
  format cond_incollege_nilf_part %6.4f
3982
 
3983
  gen cond_incollege_wagewkr = .
3984
  replace cond_incollege_wagewkr = incollege_wageworker/ incollege if incollege != 0
3985
  format cond_incollege_wagewkr %6.4f
3986
 
3987
  save enrollment_analysis_cps_oct, replace
3988
 
3989
  /*4. CREATE AN ANALYSIS FILE age_profile_enrolled_CA_students_cps_oct that includes only observations from the state of California COLLAPSED BY SURVEY YEAR AND AGE GROUP TO GENERATE TABLES AND FIGURES DESCRIBING AGGREGATE TRENDS IN SCHOOL ATTENDANCE*/
3990
  /*We want to benchmark age enrollment figures from the CSU AND CCCC systems with survey data from the October CPS.
3991
  This file creates variables that decompose school attendance within a survey year for respondents residing in the state of California by age group to answer questions such as: "in a given year, what share of all students attending school were between ages 18 and 24?"
3992
  We focus on attendance in public universities.
3993
  */
3994
 
3995
  # delimit cr
3996
  clear all
3997
 
 
3998
- cd "C:\Users\cmslo\Box Sync\race project"
3999
  *cd "D:\Box Sync\race project"
4000
 
4001
  use "main_analysis_cps_october_76_19"
 
 
4002
  keep if super_state == 1
 
 
4003
  keep if inrange(age, 18, 69)
4004
 
4005
  gen count = 1
4006
-
4007
- collapse (sum) count inschool inhighschool incollege inyr1college ingradschool inpostsec innondegree incollege_public incollege_private incollege_fulltime incollege_parttime incollege_two incollege_four incollege_nilf incollege_nilf_full incollege_nilf_part incollege_wageworker [pw=weight], by(year age_5)
4008
 
4009
  drop if age_5 == .
4010
 
4011
- /*make variables that tells us the total number of people in school, in high school, in college, and in grad school in our sample */
4012
-
4013
  sort year
4014
 
4015
  by year: egen incollege_sample = total(incollege)
4016
  by year: egen incollege_public_sample = total(incollege_public)
4017
 
4018
 
4019
- /*make vars that tell us the age profile of all adult students */
4020
-
4021
  gen age_sh_college = incollege / incollege_sample
4022
  format age_sh_college %6.4f
4023
 
4024
  gen age_sh_college_public = incollege_public / incollege_public_sample
4025
  format age_sh_college_public %6.4f
4026
 
4027
  save age_profile_enrolled_CA_students_cps_oct, replace
7
  from the October CPS .
8
  2. CREATE AN ANALYSIS FILE age_profile_enrolled_students_cps_oct COLLAPSED BY SURVEY YEAR AND AGE GROUP TO GENERATE TABLES AND FIGURES DESCRIBING AGGREGATE TRENDS IN SCHOOL ATTENDANCE
9
  3. CREATE AN ANALYSIS FILE enrollment_analysis_cps_oct COLLAPSED BY BIRTH COHORT AND 5-year age group TO DESCRIBE AGGREGATE TRENDS IN SCHOOL ATTENDANCE
10
  4. CREATE AN ANALYSIS FILE age_profile_enrolled_CA_students_cps_oct that includes only respondents residing in the state of California to benchmark age enrollment figures from the CSU AND CCCC systems with survey data from the October CPS.
11
  ******NOTE: You will need to change the directory to your own working directory.***/
12
 
13
 
14
  /* 1. CREATES AN ANALYSIS FILE main_analysis_cps_october_76_19 */
15
 
16
 
17
  /***********************************************************************************************************/
18
  /***********************************************************************************************************/
19
  /***************************** Read in October Education Supplement CPS Data from Multiple Years ******************/
20
  /***********************************************************************************************************/
21
  /***********************************************************************************************************/
22
 
23
 
24
  # delimit cr
25
  clear all
26
 
27
+ cd "/Users/chenjieyi/Downloads"
28
+ *cd "C:\Users\cmslo\Box Sync\race project"
29
  *cd "D:\Box Sync\race project"
30
 
31
 
32
  set more off
33
 
34
  clear
35
  quietly infix ///
36
  int year 1-4 ///
37
  long serial 5-9 ///
38
  byte month 10-11 ///
39
  double hwtfinl 12-21 ///
40
  double cpsid 22-35 ///
41
  byte asecflag 36-36 ///
42
  byte hflag 37-37 ///
43
  double asecwth 38-47 ///
44
  byte region 48-49 ///
45
  byte statefip 50-51 ///
46
  byte pernum 52-53 ///
47
  double wtfinl 54-67 ///
48
  double cpsidp 68-81 ///
3490
  label define edstudy_lbl 18 `"No major"', add
3491
  label define edstudy_lbl 19 `"Undecided"', add
3492
  label define edstudy_lbl 96 `"Refusal"', add
3493
  label define edstudy_lbl 97 `"Don't know"', add
3494
  label define edstudy_lbl 98 `"No response"', add
3495
  label define edstudy_lbl 99 `"Not in Universe"', add
3496
  label values edstudy edstudy_lbl
3497
 
3498
  label define qedschus_lbl 1 `"Allocated"'
3499
  label define qedschus_lbl 0 `"Not Allocated"', add
3500
  label values qedschus qedschus_lbl
3501
 
3502
  label define qedschusyrs_lbl 0 `"Not Allocated"'
3503
  label define qedschusyrs_lbl 1 `"Allocated"', add
3504
  label values qedschusyrs qedschusyrs_lbl
3505
 
3506
  label define qedstudy_lbl 1 `"Allocated"'
3507
  label define qedstudy_lbl 0 `"Not Allocated"', add
3508
  label values qedstudy qedstudy_lbl
3509
 
3510
+ /*Drop 2020 observations to keep away from covid shock*/
3511
  drop if year == 2020
3512
+
3513
  keep if month == 10
3514
+
3515
+ /*Keep US-born people only*/
3516
+ keep if bpl == 09900
3517
+
3518
+ /*Drop if U.S. state of residence is missing*/
3519
+ keep if statefip >= 1 & statefip <= 56
3520
+
 
3521
+ /*Data Imputation Flags:
3522
+ For CPS supplements other than the ASEC, values are imputed for all years 1982 forward.
3523
+ Keep track of imputation status.
3524
+ */
3525
  #delimit cr
3526
  gen imputed_flag = 0
3527
  foreach var of varlist qage qsex qrace {
3528
  replace imputed_flag = 1 if `var' != 0
3529
  }
3530
+ replace imputed_flag = . if year <1982
3531
+ /*
3532
+ JY: After bpl==09900, all the years before 1994 are dropped.
3533
+ Do we really need to consider 1982 since this step only have year 1994-2019?
3534
+ We think the above line of code is not useful.
3535
+ */
3536
+
3537
+ /*Note about IPW for This Sample:
3538
+ We preserve sex, race, and age joint distribution.
3539
+ Because we are focusing on child outcomes here, we have to contend with differing sampling frames in the CPS with respect to age and attainment.
3540
+ Attainment questions were only asked of children over 15 in the early years of the CPS.
3541
+ CPS across years have differnt smapling frames, so we need to use IPW to adjust them.
3542
+ Thus, unlike other supplements in doing IPW in this supplement, we do not use attainment in the IPW.
3543
+ This is a non-issue in other samples where we focus on adults*/
3544
  egen trq = group(age sex race)
3545
  egen num = sum(imputed_flag*edsuppwt), by(trq)
3546
  egen den = sum(edsuppwt), by(trq)
3547
  gen phat = num/den
3548
  drop if imputed_flag == 1
3549
  gen weight = edsuppwt/(1-phat)
3550
  replace weight = edsuppwt if year <1982
3551
+ /*FYQ: why edsuppwt can be used in October CPS (though only available in October), will that affect the comparison with March CPS?*/
3552
 
3553
  /*check sample before drop*/
3554
+ /*FYQ: in what cases are weights less than zero? (JY: missing value?)*/
3555
  drop if weight < 0
3556
 
3557
+ /*FYQ+JY: why we name "cps_october_76_19" but not 94_19 given the years available*/
3558
  save cps_october_76_19, replace
3559
 
3560
+ /*************Prepare for main_analysis file**************/
3561
 
3562
  # delimit cr
3563
  clear all
3564
 
3565
+ cd "/Users/chenjieyi/Downloads"
3566
+ *cd "C:\Users\cmslo\Box Sync\race project"
3567
  *cd "D:\Box Sync\race project"
3568
 
3569
  use "cps_october_76_19"
3570
 
 
3571
  /*Identify if someone is currently enrolled in school: universe for adults starts in 1978*/
3572
 
3573
+ /*FYQ: why are we comparing against 1987 in some places while against 1978 elsewhere*/
3574
+
3575
+ /*Generate a currently in school indicator
3576
+ EDATT indicates whether a person is attending or enrolled in regular school, 1 means yes*/
3577
  gen inschool = 0
3578
  replace inschool = 1 if edatt == 1
3579
  replace inschool = . if year < 1978
3580
 
3581
+ /*Generate a last year in school indicator
3582
+ EDATTLY indicates whether the person was enrolled in regular school the previous October, and 1 means yes*/
3583
  gen inschool_ly = 0
3584
  replace inschool_ly = 1 if edattly == 1
3585
+ replace inschool_ly = . if year < 1987
3586
+
3587
+ /*Generate a currently in high school indicator
3588
+ EDGRADE identifies the current grade or level of schooling for people currently enrolled in regular school, and 201 to 204 means 9th to 12th grade*/
3589
  gen inhighschool = 0
3590
+ replace inhighschool = 1 if edgrade>= 201 & edgrade <= 204
3591
  replace inhighschool = . if year < 1978
3592
 
3593
+ /*Generate a currently in college indicator
3594
+ EDGRADE 301 to 304 means college year1 to year4*/
3595
  gen incollege = 0
3596
+ replace incollege = 1 if edgrade>= 301 & edgrade <= 304
3597
  replace incollege = . if year < 1978
3598
 
3599
+ /*Generate a currently in college year1 indicator
3600
+ EDGRADE 301 means college year1*/
3601
  gen inyr1college = 0
3602
  replace inyr1college = 1 if edgrade== 301
3603
  replace inyr1college = . if year < 1978
3604
 
3605
+ /*Generate a currently in graduate school indicator
3606
+ EDGRADE 401 to 402 means graduate school year1 to year2+*/
3607
  gen ingradschool = 0
3608
  replace ingradschool = 1 if edgrade>= 401 & edgrade <= 402
3609
  replace ingradschool = . if year < 1978
3610
 
3611
+ /*Generate a currently postsecondary indicator that includes undergrad or grad school to have something to compare to IPEDS enrollments*/
3612
  gen inpostsec = 0
3613
  replace inpostsec = 1 if incollege == 1 | ingradschool == 1
3614
  replace inpostsec = . if year < 1978
3615
 
3616
+ /*Generate a currently in non degree program indicator
3617
+ EDVOCA indicates whether or not a person is currently enrolled in a training program other than regular school, and 2 means yes*/
3618
  gen innondegree = 0
3619
+ replace innondegree = 1 if edvoca == 2
3620
  replace innondegree = . if year < 1987
3621
  replace innondegree = . if incollege == 1
3622
  replace innondegree = . if inhighschool == 1
3623
  replace innondegree = . if ingradschool == 1
3624
+ /*FYQ: for those who are enrolled in both vocational school and a regular high school/college/grad, why are we setting innondegree to "." instead of "zero"*/
3625
+
3626
+ /*Generate a last year in high school indicator
3627
+ EDGRDLY identifies which grade the focal person was enrolled in the previous October, and 201 to 204 means 9th to 12th grade*/
3628
  gen inhighschool_ly = 0
3629
  replace inhighschool_ly = 1 if edgrdly>= 201 & edgrdly <=204
3630
  replace inhighschool_ly = . if year < 1987
3631
 
3632
+ /*Generate a last year in college indicator
3633
+ EDGRDLY 301 to 304 means college year1 to year4*/
3634
  gen incollege_ly = 0
3635
  replace incollege_ly = 1 if edgrdly>= 301 & edgrdly <=304
3636
  replace incollege_ly = . if year < 1987
3637
 
3638
+ /*Generate a last year in college year1 indicator
3639
+ EDGRDLY 301 means college year1*/
3640
  gen inyr1college_ly = 0
3641
  replace inyr1college_ly = 1 if edgrdly== 301
3642
  replace inyr1college_ly = . if year < 1987
3643
 
3644
+ /*Generate a last year in graduate school indicator
3645
+ EDGRDLY 401 to 402 means graduate school year1 to year2+*/
3646
  gen ingradschool_ly = 0
3647
  replace ingradschool_ly = 1 if edgrdly>= 401 & edgrdly <= 402
3648
  replace ingradschool_ly = . if year < 1988
3649
 
3650
+ /*Generate a currently in public school indicator
3651
+ EDPUPR identifies whether a person was in public school or private school enrollment, and 10 is public*/
3652
  gen public = 0
3653
  replace public = 1 if edpupr == 10
3654
  replace public = . if year <1978
3655
 
3656
+ /*Generate a currently in private school indicator
3657
+ EDPUPR 11 is private*/
3658
  gen private = 0
3659
  replace private = 1 if edpupr == 11
3660
  replace private = . if year <1978
3661
 
3662
+ /*Generate a currently in public college indicator*/
3663
  gen incollege_public = incollege * public
3664
  replace incollege_public = . if year < 1978
3665
 
3666
+ /*Generate a currently in private college indicator*/
3667
  gen incollege_private = incollege * private
3668
  replace incollege_private = . if year < 1978
3669
 
3670
+ /*Generate a fulltime student indicator
3671
+ EDFULL identifies the part-time or full-time status of people currently attending college or graduate school, and 1 is fulltime*/
3672
  gen fulltime = 0
3673
  replace fulltime = 1 if edfull == 1
3674
  replace fulltime = . if year < 1978
3675
 
3676
+ /*Generate a parttime student indicator
3677
+ EDFULL 2 is parttime*/
3678
  gen parttime = 0
3679
  replace parttime = 1 if edfull == 2
3680
  replace parttime = . if year < 1978
3681
 
3682
+ /*Generate a currently fulltime in college indicator*/
3683
  gen incollege_fulltime = incollege * fulltime
3684
  replace incollege_fulltime = . if year < 1978
3685
 
3686
+ /*Generate a currently parttime in college indicator*/
3687
  gen incollege_parttime = incollege * parttime
3688
  replace incollege_parttime = . if year < 1978
3689
 
3690
+ /*Generate a 2-year college indicator
3691
+ EDTYPE indicates whether persons who are enrolled in college attend a 2-year or 4-year institution, and 1 is 2-year college*/
3692
  gen two_yr = 0
3693
  replace two_yr = 1 if edtype == 1
3694
  replace two_yr = . if year <1978
3695
 
3696
+ /*Generate a 4-year college indicator
3697
+ EDTYPE 2 is 4-year college*/
3698
  gen four_yr = 0
3699
  replace four_yr = 1 if edtype == 2
3700
  replace four_yr = . if year <1978
3701
 
3702
+ /*Generate a currently in 2-year college indicator*/
3703
  gen incollege_two = incollege * two_yr
3704
  replace incollege_two = . if year < 1978
3705
 
3706
+ /*Generate a currently in 4-year college indicator*/
3707
  gen incollege_four = incollege * four_yr
3708
  replace incollege_four = . if year < 1978
3709
 
3710
+ /*Generate a not in labor force indicator
3711
+ EMPSTAT indicates whether persons were part of the labor force, and >= 30 means not in labor force*/
3712
  gen nilf = 0
3713
  replace nilf= 1 if empstat >= 30
3714
 
3715
+ /*Generate a currently in college and not in labor force indicator*/
3716
  gen incollege_nilf = incollege * nilf
3717
+
3718
+ /*Generate a currently full-time in college and not in labor force indicator*/
3719
  gen incollege_nilf_full = incollege * nilf * fulltime
3720
+
3721
+ /*Generate a currently part-time in college and not in labor force indicator*/
3722
+ /*FYQ: In what cases would both nilf and parttime both be 1? Isn't that a data error if that occurred?*/
3723
  gen incollege_nilf_part = incollege * nilf * parttime
3724
 
3725
+ /*Generate a working for wages indicator
3726
+ CLASSWKR 20-28 means works for wages or salary*/
3727
+ /*FYQ: Why do we need it? Is it because the data were not perfect so that some people say that they are wage_worker yet they are not in the labor force?*/
3728
  gen wage_worker = 0
3729
  replace wage_worker = 1 if classwkr >=20 & classwkr <= 28
3730
  replace wage_worker = 0 if nilf == 1
3731
 
3732
+ /*Generate a in college and working for wages indicator*/
3733
  gen incollege_wageworker = incollege * wage_worker
3734
 
 
3735
  /*make birth cohort vars*/
 
3736
  gen byear = year - age
 
3737
  gen birth_cohort = 1915 if byear >= 1913 & byear <=1917
3738
  replace birth_cohort = 1920 if byear >= 1918 & byear <=1922
3739
  replace birth_cohort = 1925 if byear >= 1923 & byear <=1927
3740
  replace birth_cohort = 1930 if byear >= 1928 & byear <=1932
3741
  replace birth_cohort = 1935 if byear >= 1933 & byear <=1937
3742
  replace birth_cohort = 1940 if byear >= 1938 & byear <=1942
3743
  replace birth_cohort = 1945 if byear >= 1943 & byear <=1947
3744
  replace birth_cohort = 1950 if byear >= 1948 & byear <=1952
3745
  replace birth_cohort = 1955 if byear >= 1953 & byear <=1957
3746
  replace birth_cohort = 1960 if byear >= 1958 & byear <=1962
3747
  replace birth_cohort = 1965 if byear >= 1963 & byear <=1967
3748
  replace birth_cohort = 1970 if byear >= 1968 & byear <=1972
3749
  replace birth_cohort = 1975 if byear >= 1973 & byear <=1977
3750
  replace birth_cohort = 1980 if byear >= 1978 & byear <=1982
3751
  replace birth_cohort = 1985 if byear >= 1983 & byear <=1987
3752
  replace birth_cohort = 1990 if byear >= 1988 & byear <=1992
3753
 
3754
+ /*Generate some age group variables*/
3755
  gen a20 = 0
3756
  replace a20 = 1 if age >= 18 & age <= 24
3757
 
3758
  gen a25 = 0
3759
  replace a25 = 1 if age >= 25 & age <= 29
3760
 
3761
  gen a30 = 0
3762
  replace a30 = 1 if age >= 30 & age <= 34
3763
 
3764
  gen a35 = 0
3765
  replace a35 = 1 if age >= 35 & age <= 39
3766
 
3767
  gen a40 = 0
3768
  replace a40 = 1 if age >= 40 & age <= 44
3769
 
3770
  gen a45 = 0
3771
  replace a45 = 1 if age >= 45 & age <= 49
3772
 
3773
  gen a50 = 0
3774
  replace a50 = 1 if age >= 50 & age <= 54
3775
 
3776
  gen a55 = 0
3777
  replace a55 = 1 if age >= 55 & age <= 59
3778
 
3779
  gen a60 = 0
3780
  replace a60 = 1 if age >= 60 & age <= 64
3781
 
3782
  gen a65 = 0
3783
  replace a65 = 1 if age >= 65 & age <= 69
3784
 
3785
+ /*FYQ + JY: Why we need a45_55 and why it is from 43 to 57*/
3786
  gen a45_55 = 0
3787
  replace a45_55 = 1 if age>=43 & age <=57
3788
 
3789
+ /*Generate a 5-year age group variable */
3790
  gen age_5 = 0
3791
+ replace age_5 = 20 if a20 == 1 /*Be careful that a20 = [18, 24] */
3792
  replace age_5 = 25 if a25 == 1
3793
  replace age_5 = 30 if a30 == 1
3794
  replace age_5 = 35 if a35 == 1
3795
  replace age_5 = 40 if a40 == 1
3796
  replace age_5 = 45 if a45 == 1
3797
  replace age_5 = 50 if a50 == 1
3798
  replace age_5 = 55 if a55 == 1
3799
  replace age_5 = 60 if a60 == 1
3800
  replace age_5 = 65 if a65 == 1
3801
 
3802
+ /* Generate some binary gender vairables*/
3803
  gen male = 0
3804
  replace male = 1 if sex == 1
3805
 
3806
  gen female = 0
3807
  replace female = 1 if sex == 2
3808
 
3809
+ /*Generate some binary race variables
3810
+ Our focus is on black and white who are not hispanic*/
3811
  gen white = 0
3812
  replace white = 1 if race == 100 & hispan == 0
3813
 
3814
  gen black = 0
3815
  replace black = 1 if race == 200 & hispan == 0
3816
 
3817
+ /*Create an alternative geography (super_state).
3818
+ The 12th largest states get assigned their state.The rest get assigned to their census division.*/
3819
  gen super_state = 0
3820
  replace super_state = 32 if statefip == 1
3821
  replace super_state = 42 if statefip == 2
3822
  replace super_state = 41 if statefip == 4
3823
  replace super_state = 33 if statefip == 5
3824
  replace super_state = 1 if statefip == 6
3825
  replace super_state = 41 if statefip == 8
3826
  replace super_state = 81 if statefip == 9
3827
  replace super_state = 31 if statefip == 10
3828
  replace super_state = 31 if statefip == 11
3829
  replace super_state = 3 if statefip == 12
3830
  replace super_state = 8 if statefip == 13
3831
  replace super_state = 42 if statefip == 15
3832
  replace super_state = 41 if statefip == 16
3833
  replace super_state = 6 if statefip == 17
3834
  replace super_state = 21 if statefip == 18
3835
  replace super_state = 22 if statefip == 19
3836
  replace super_state = 22 if statefip == 20
3837
  replace super_state = 32 if statefip == 21
3838
  replace super_state = 33 if statefip == 22
3852
  replace super_state = 4 if statefip == 36
3853
  replace super_state = 9 if statefip == 37
3854
  replace super_state = 22 if statefip == 38
3855
  replace super_state = 7 if statefip == 39
3856
  replace super_state = 33 if statefip == 40
3857
  replace super_state = 42 if statefip == 41
3858
  replace super_state = 5 if statefip == 42
3859
  replace super_state = 81 if statefip == 44
3860
  replace super_state = 31 if statefip == 45
3861
  replace super_state = 22 if statefip == 46
3862
  replace super_state = 32 if statefip == 47
3863
  replace super_state = 2 if statefip == 48
3864
  replace super_state = 41 if statefip == 49
3865
  replace super_state = 81 if statefip == 50
3866
  replace super_state = 12 if statefip == 51
3867
  replace super_state = 42 if statefip == 53
3868
  replace super_state = 31 if statefip == 54
3869
  replace super_state = 31 if statefip == 55
3870
  replace super_state = 21 if statefip == 56
3871
  assert super_state > 0
3872
+ lab def ss 1 "CA" 2 "TX" 3 "FL" 4 "NY" 5 "PA" 6 "IL" 7 "OH" 8 "GA" 9 "NC" 10 "MI" 11 "NJ" 12 "VA" 21 "East North Central" 22 "West North Central" 31 "South Atlantic" 32 "East South Central" 33 "West South Central" 41 "Mountain" 42 "Pacific" 81 "New England"
 
3873
  lab val super_state ss
3874
 
3875
+ /*FYQ+JY: Why we name "main_analysis_cps_october_76_19" but not 94_19 given the years available?*/
 
3876
  save main_analysis_cps_october_76_19, replace
 
3877
 
3878
  /*2. CREATE AN ANALYSIS FILE age_profile_enrolled_students_cps_oct COLLAPSED BY SURVEY YEAR AND AGE GROUP TO GENERATE TABLES AND FIGURES DESCRIBING AGGREGATE TRENDS IN SCHOOL ATTENDANCE*/
3879
  /*First, we want to describe within a survey year what share of students are adult students-- this is to make analagous pictures to the IPEDS pictures.
3880
  This file creates variables that decompose school attendance within a survey year by age group to answer questions such as: "in a given year, what share of all students attending school were between ages 18 and 24?"*/
3881
 
3882
  # delimit cr
3883
  clear all
3884
 
3885
+ cd "/Users/chenjieyi/Downloads"
3886
+ *cd "C:\Users\cmslo\Box Sync\race project"
3887
  *cd "D:\Box Sync\race project"
3888
 
3889
  use "main_analysis_cps_october_76_19"
3890
 
3891
+ /*Limit age between 18 and 69*/
3892
+ keep if inrange(age, 18, 69)
3893
+
3894
+ /*We assign count = 1 to each observation.
3895
+ Within each age-year group, the sum of the count variable tells us the number of observations in that group.*/
3896
  gen count = 1
3897
 
3898
+ /*Count the number of realized (value = 1) obseravtions for each indicators, grouping by survey year and 5-year age group*/
3899
+ collapse (sum) count inschool inhighschool incollege inyr1college ingradschool inpostsec innondegree incollege_public incollege_private incollege_fulltime incollege_parttime incollege_two incollege_four incollege_nilf incollege_nilf_full incollege_nilf_part incollege_wageworker [pw=weight], by(year age_5)
3900
+
3901
+ /*FYQ: Is this necessary? In what cases would we have missing values here?*/
3902
  drop if age_5 == .
3903
 
3904
+ /*Make variables that tell us the total number of people in school, in high school, in college, and in grad school in our sample for each survey year*/
3905
 
3906
  sort year
3907
 
3908
  by year: egen inschool_sample = total(inschool)
3909
 
3910
  by year: egen inhighschool_sample = total(inhighschool)
3911
 
3912
  by year: egen incollege_sample = total(incollege)
3913
 
3914
  by year: egen ingradschool_sample = total(ingradschool)
3915
 
3916
  by year: egen inpostsec_sample = total(inpostsec)
3917
 
3918
  by year: egen inpublic_sample = total(incollege_public)
3919
 
3920
  by year: egen inprivate_sample = total(incollege_private)
3921
 
3922
  by year: egen inyr1_sample = total(inyr1college)
3923
 
3924
  by year: egen in4yr_sample = total(incollege_four)
3925
 
3926
+ /*Make variables that tell us the age profile of all adult students for each survey year*/
3927
 
3928
  gen age_sh_school = inschool / inschool_sample
3929
  format age_sh_school %6.4f
3930
 
3931
  gen age_sh_highschool = inhighschool / inhighschool_sample
3932
  format age_sh_highschool %6.4f
3933
 
3934
  gen age_sh_college = incollege / incollege_sample
3935
  format age_sh_college %6.4f
3936
 
3937
  gen age_sh_public = incollege_public / inpublic_sample
3938
  format age_sh_public %6.4f
3939
 
3940
  gen age_sh_private = incollege_private / inprivate_sample
3941
  format age_sh_private %6.4f
3942
 
3943
  gen age_sh_gradschool = ingradschool / ingradschool_sample
3944
  format age_sh_gradschool %6.4f
3945
 
3946
  gen age_sh_postsec = inpostsec / inpostsec_sample
3952
  gen age_sh_4yr = incollege_four / in4yr_sample
3953
  format age_sh_4yr %6.4f
3954
 
3955
  save age_profile_enrolled_students_cps_oct, replace
3956
 
3957
 
3958
  /*3. CREATE AN ANALYSIS FILE enrollment_analysis_cps_oct COLLAPSED BY BIRTH COHORT AND 5-year age group TO DESCRIBE AGGREGATE TRENDS IN SCHOOL ATTENDANCE*/
3959
 
3960
  /*The previous analysis file decomposes all school enrollment in a survey year by age group to answer questions like: "in a given year, what share of all students attending school were between ages 18 and 24?"
3961
 
3962
  In contrast, this analysis file describes school attendance patterns within a 5-year birth cohort at a specific age to answer questions like: "what share of the 1970 birth cohort attended school between ages 18 and 24?"
3963
 
3964
  The "share variables" answer those types of questions.
3965
 
3966
  Additionally, we also create share variables that are conditioned on attempt, and variables to describe the population of enrolled students.
3967
  */
3968
 
3969
  # delimit cr
3970
  clear all
3971
 
3972
+ cd "/Users/chenjieyi/Downloads"
3973
+ *cd "C:\Users\cmslo\Box Sync\race project"
3974
  *cd "D:\Box Sync\race project"
3975
 
3976
  use "main_analysis_cps_october_76_19"
3977
 
3978
+ /*Limit age between 23 and 69*/
3979
+ /*JY+FYQ: why do we start from 23 here? */
3980
  keep if inrange(age, 23, 69)
3981
 
3982
  gen count = 1
3983
+ collapse (sum) count inschool inhighschool incollege inyr1college ingradschool innondegree incollege_public incollege_private incollege_fulltime incollege_parttime incollege_two incollege_four incollege_nilf incollege_nilf_full incollege_nilf_part incollege_wageworker [pw=weight], by(birth_cohort age_5)
 
3984
 
3985
  drop if birth_cohort == .
3986
 
3987
+ /*Make some share variables to describe schooling patterns within age/ birth-cohort*/
 
3988
  gen share_inschool = .
3989
  replace share_inschool = inschool / count if count != 0
3990
  format share_inschool %6.4f
3991
 
3992
  gen share_inhighschool = .
3993
  replace share_inhighschool = inhighschool / count if count != 0
3994
  format share_inhighschool %6.4f
3995
 
3996
  gen share_incollege = .
3997
  replace share_incollege = incollege / count if count != 0
3998
  format share_incollege %6.4f
3999
 
4000
  gen share_ingradschool = .
4001
  replace share_ingradschool = ingradschool / count if count != 0
4002
  format share_ingradschool %6.4f
4003
 
4004
  gen share_innondegree = .
4005
  replace share_innondegree = innondegree / count if count != 0
4006
  format share_innondegree %6.4f
4007
 
4059
 
4060
  gen cond_incollege_nilf_part = .
4061
  replace cond_incollege_nilf_part = incollege_nilf_part / incollege if incollege != 0
4062
  format cond_incollege_nilf_part %6.4f
4063
 
4064
  gen cond_incollege_wagewkr = .
4065
  replace cond_incollege_wagewkr = incollege_wageworker/ incollege if incollege != 0
4066
  format cond_incollege_wagewkr %6.4f
4067
 
4068
  save enrollment_analysis_cps_oct, replace
4069
 
4070
  /*4. CREATE AN ANALYSIS FILE age_profile_enrolled_CA_students_cps_oct that includes only observations from the state of California COLLAPSED BY SURVEY YEAR AND AGE GROUP TO GENERATE TABLES AND FIGURES DESCRIBING AGGREGATE TRENDS IN SCHOOL ATTENDANCE*/
4071
  /*We want to benchmark age enrollment figures from the CSU AND CCCC systems with survey data from the October CPS.
4072
  This file creates variables that decompose school attendance within a survey year for respondents residing in the state of California by age group to answer questions such as: "in a given year, what share of all students attending school were between ages 18 and 24?"
4073
  We focus on attendance in public universities.
4074
  */
4075
 
4076
  # delimit cr
4077
  clear all
4078
 
4079
+ cd "/Users/chenjieyi/Downloads"
4080
+ *cd "C:\Users\cmslo\Box Sync\race project"
4081
  *cd "D:\Box Sync\race project"
4082
 
4083
  use "main_analysis_cps_october_76_19"
4084
+
4085
+ /*Keep California sample only*/
4086
  keep if super_state == 1
4087
+
4088
+ /*Limit age between 18 and 69*/
4089
  keep if inrange(age, 18, 69)
4090
 
4091
  gen count = 1
4092
+ collapse (sum) count inschool inhighschool incollege inyr1college ingradschool inpostsec innondegree incollege_public incollege_private incollege_fulltime incollege_parttime incollege_two incollege_four incollege_nilf incollege_nilf_full incollege_nilf_part incollege_wageworker [pw=weight], by(year age_5)
 
4093
 
4094
  drop if age_5 == .
4095
 
4096
+ /*Make variables that tells us the total number of people in school, in high school, in college, and in grad school in our sample for each survey year*/
 
4097
  sort year
4098
 
4099
  by year: egen incollege_sample = total(incollege)
4100
  by year: egen incollege_public_sample = total(incollege_public)
4101
 
4102
 
4103
+ /*Make vars that tell us the age profile of all adult students for each survey year*/
 
4104
  gen age_sh_college = incollege / incollege_sample
4105
  format age_sh_college %6.4f
4106
 
4107
  gen age_sh_college_public = incollege_public / incollege_public_sample
4108
  format age_sh_college_public %6.4f
4109
 
4110
  save age_profile_enrolled_CA_students_cps_oct, replace