1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
|
cd "C:\Users\wugan\Desktop\DATA\CFPS中国家庭追踪调查" use "CFPS2018\cfps2018famecon",clear ssc install ereplace,replace
tabstat fa1,stat(mean) by(fa1) tabstat fa101,stat(mean) by(fa101)
gen CX=1 if fa1==1 | ((fa1==77 | fa1==-8) & fa101==1) replace CX=0 if fa1==5 | ((fa1==77 | fa1==-8) & fa101==5)
tab fl10
gen
gen diqu="" label variable diqu "地区" replace diqu="东部地区" if pcode==11 | pcode==12 | pcode==13 | pcode==31 | pcode==32 | pcode==33 | pcode==35 | pcode==37 | pcode==44 | pcode==46 replace diqu="中部地区" if pcode==14 | pcode==34 | pcode==36 | pcode==41 | pcode==42 | pcode==43 replace diqu="西部地区" if pcode==15 | pcode==45 | pcode==50 | pcode==51 | pcode==52 | pcode==53 | pcode==54 | pcode==61 | pcode==62 | pcode==63 | pcode==64 | pcode==65 | pcode==66 replace diqu="东北地区" if pcode==21 | pcode==22 | pcode==23
tab X if X>999999 & X<1000001
save "数据清洗\cfps2018cfps2018famecon",replace
cd "C:\Users\wugan\Desktop\DATA" use "CMDS流动人口动态监测数据\2017\2017_Migrant_A_desensitized.dta",clear keep newID C1 C2 C3 C6 C7 Q100 q101* forvalues i=1/10 { rename (q101c`i'y q101c`i'm q101j`i'a q101j`i'b q101m`i'y q101m`i'm) (q101cy`i' q101cm`i' q101ja`i' q101jb`i' q101my`i' q101mm`i') }
reshape long q101id q101a q101b q101cy q101cm q101d q101e q101f q101g q101h q101i q101ja q101jb q101k q101l q101my q101mm q101n, i(newID) j(list) drop if q101a==.
gen age=2017-q101cy label var age "年龄"
tabstat q101e,stat(mean n) by(q101e) gen edu=. replace edu=0 if q101e==1 replace edu=6 if q101e==2 replace edu=9 if q101e==3 replace edu=12 if q101e==4 replace edu=15 if q101e==5 replace edu=16 if q101e==6 replace edu=19 if q101e==7
gen labor=1 replace labor=0 if age>60 | age<18 replace labor=0 if health==7
ssc install ereplace,replace ereplace age=mean(age),by(newID) ereplace edu=mean(edu),by(newID)
duplicates drop newID,force keep newID age edu save "数据清洗\CMDS2017_merge.dta",replace
use "CMDS流动人口动态监测数据\2017\2017_Migrant_A_desensitized.dta",clear merge 1:1 newID using "数据清洗\CMDS2017_merge.dta",nogen save "数据清洗\2017_Migrant_A",replace rm "数据清洗\CMDS2017_merge.dta"
cd "C:\Users\wugan\Desktop\DATA"
use "CFPS中国家庭追踪调查\CFPS2018\cfps2018famecon",clear merge 1:m fid18 using "CFPS中国家庭追踪调查\CFPS2018\cfps2018person" keep if _merge==3
drop _merge save "数据清洗\cfps2018famecon",replace
clear cd "C:\Users\wugan\Desktop\DATA\转码\CLDS2016(Stata13)" unicode encoding set gb18030 unicode analyze * unicode translate *, invalid
cd "C:\Users\wugan\Desktop\DATA\CFPS中国家庭追踪调查" forvalues i=14(2)18 { use "CFPS20`i'\cfps20`i'famecon.dta",clear keep fid* daily dress eec eptran epwelf expense food house med mortage other pce trco save "数据清洗\cfps20`i'",replace } use "数据清洗\cfps2014",clear
use "数据清洗\cfps2018",clear append using "数据清洗\cfps2016" append using "数据清洗\cfps2014" save "数据清洗\cfps14-18",replace
cd "/~/CFPS分省分收入组各项支出/计算消费支出" forvalues yy=14(2)18 { use "校正支出数据/CFPS/CFPS20`yy'",clear
gen CX=. replace CX=1 if fa1==1 | fa101==1 replace CX=2 if fa1==5 | fa101==5 label var CX "城乡识别码:1=城镇;2=农村"
egen disposable_inc=rowtotal(fwage_1 foperate_1 fproperty_1 ftransfer_1 felse_1)
gen dis_inc=disposable_inc/familysize`yy'
tostring provcd`yy',g(pcode)
gen select=1 replace select=0 if (pcode=="." | pcode=="15" | pcode=="46" | pcode=="54" | pcode=="63" | pcode=="64" | pcode=="65") | CX==.
local var "daily dress eec food house med trco other eptran epwelf mortage" foreach j of local var { replace `j'=0 if `j'==. replace `j'=. if select==0 } replace dis_inc=0 if dis_inc==.
gen type5=.
forvalues j=1/2 { egen p0_`j'=min(dis_inc) if select==1 & CX==`j',by(pcode) forvalues i=20(20)80 { egen p`i'_`j'=pctile(dis_inc) if select==1 & CX==`j',p(`i') by(pcode) } egen p100_`j'=max(dis_inc) if select==1 & CX==`j',by(pcode) replace type5=1 if dis_inc>=p0_`j' & dis_inc<p20_`j' replace type5=2 if dis_inc>=p20_`j' & dis_inc<p40_`j' replace type5=3 if dis_inc>=p40_`j' & dis_inc<p60_`j' replace type5=4 if dis_inc>=p60_`j' & dis_inc<p80_`j' replace type5=5 if dis_inc>=p80_`j' & dis_inc<=p100_`j' }
destring type5,replace gen typegap=type5-fincome1_per_p egen new_disposable_inc=rowtotal(fincome1) if (typegap==-3 | typegap==-4) & fincome1!=.
replace disposable_inc=new_disposable_inc if (typegap==-3 | typegap==-4) & fincome1!=. drop new_disposable_inc typegap
drop dis_inc gen dis_inc=disposable_inc/familysize`yy' replace dis_inc=0 if dis_inc==.
drop type5 p*_* gen type5=.
forvalues j=1/2 { egen p0_`j'=min(dis_inc) if select==1 & CX==`j',by(pcode) forvalues i=20(20)80 { egen p`i'_`j'=pctile(dis_inc) if select==1 & CX==`j',p(`i') by(pcode) } egen p100_`j'=max(dis_inc) if select==1 & CX==`j',by(pcode) replace type5=1 if dis_inc>=p0_`j' & dis_inc<p20_`j' replace type5=2 if dis_inc>=p20_`j' & dis_inc<p40_`j' replace type5=3 if dis_inc>=p40_`j' & dis_inc<p60_`j' replace type5=4 if dis_inc>=p60_`j' & dis_inc<p80_`j' replace type5=5 if dis_inc>=p80_`j' & dis_inc<=p100_`j' }
tostring CX,replace tostring type5,replace gen group=pcode+"0"+CX+"0"+type5 local var "daily dress eec food house med trco other eptran epwelf mortage" foreach i of local var { gen new_`i'=. forvalues j=1/5 { ereplace new_`i'=mean(`i') if select==1 & CX=="1" & type5=="`j'" & new_`i'==., by(pcode) ereplace new_`i'=mean(`i') if select==1 & CX=="2" & type5=="`j'" & new_`i'==., by(pcode) } }
egen famsize=mean(familysize`yy') if select==1,by(group)
keep if select==1 sort group keep fid`yy' fid14 pcode CX type5 group new_* dis_inc daily dress eec food house med trco other eptran epwelf mortage famsize familysize`yy' disposable_inc dis_inc fincome1_per_p
destring pcode CX type5 group,replace keep fid`yy' fid14 pcode CX type5 group new_* daily dress eec food house med trco other eptran epwelf mortage familysize`yy' disposable_inc dis_inc fincome1_per_p save "校正支出数据/使用数据/1028-CFPS20`yy'",replace
use "校正支出数据/使用数据/1028-CFPS20`yy'",clear
tostring CX type5,replace
replace CX="城镇" if CX=="1" replace CX="农村" if CX=="2"
replace type5="低收入户(20%)" if type5=="1" replace type5="中间偏下户(20%)" if type5=="2" replace type5="中间收入户(20%)" if type5=="3" replace type5="中间偏上户(20%)" if type5=="4" replace type5="高收入户(20%)" if type5=="5"
merge m:1 pcode using "/~/CFPS分省分收入组各项支出/计算消费支出/城乡分组消费数据/省份识别码" keep if _merge==3 drop _merge
egen expense=rowtotal(daily dress eec food house med trco other eptran epwelf mortage) gen count=1 egen count_num=total(count),by(group) drop count egen new_expense=mean(expense),by(group) egen min_expense=min(expense), by(group) egen max_expense=max(expense), by(group)
egen expense_`yy'=total(expense),by(fid14) local varname "daily dress eec food house med trco other eptran epwelf mortage familysize`yy'" foreach v of local varname { ereplace `v'=total(`v'),by(fid14) }
sort fid14 expense egen list=seq(),to(100) by(fid14) egen list_max=max(list),by(fid14) gen check=1 if list_max==list keep if check==1 gen fenhu`yy'=0 replace fenhu`yy'=1 if list_max>=2 & list_max<=100 drop list expense check list_max
label var type5 "收入组划分" label var group "分类识别码(省码+城乡识别码+收入组划分码)" label var pcode "省份识别码" label var province "省份名称" label var daily "原-家庭设备及日用品" label var dress "原-衣着" label var eec "原-文化教育娱乐" label var food "原-食品" label var house "原-居住(房租、水电等,不含房贷)" label var med "原-医疗保健" label var trco "原-交通通讯" label var other "原-其他消费支出" label var eptran "原-转移性支出" label var epwelf "原-保障性支出" label var mortage "原-建房购房贷款支出" label var expense "原-家庭总支出" label var new_daily "1.家庭设备及日用品" label var new_dress "2.衣着" label var new_eec "3.文化教育娱乐" label var new_food "4.食品" label var new_house "5.居住(房租、水电等,不含房贷)" label var new_med "6.医疗保健" label var new_trco "7.交通通讯" label var new_other "8.其他消费支出" label var new_eptran "9.转移性支出" label var new_epwelf "10.保障性支出" label var new_mortage "11.建房购房贷款支出" label var new_expense "家庭总支出(分组均值)" label var count_num "样本数" label var fenhu`yy' "是否分户(1=是,0=否)" label var min_expense "家庭总支出(分组最小值)" label var max_expense "家庭总支出(分组最大值)"
order fid`yy' province pcode CX type5 group daily dress eec food house med trco other eptran epwelf mortage expense new_* fenhu`yy' sort group
save "校正支出数据/校正20`yy'",replace export excel using "校正支出数据/三期数据表.xlsx", sheet("CFPS20`yy'",modify) firstrow(varlabels)
egen list=seq(),to(1000) by(group) keep if list==1 drop fid`yy' list export excel using "校正支出数据/三期数据表.xlsx", sheet("20`yy'分组统计",modify) firstrow(varlabels)
use "校正支出数据/校正20`yy'",clear keep fid`yy' fid14 province pcode CX type5 group daily dress eec food house med trco other eptran epwelf mortage expense new_expense count_num min_expense max_expense familysize`yy' fenhu`yy' local varname "province pcode CX type5 group daily dress eec food house med trco other eptran epwelf mortage expense new_expense count_num min_expense max_expense" foreach v of local varname{ rename `v' `v'`yy' } save "校正支出数据/校正20`yy'",replace }
|