sas基本统计情况文本笔记
Range极差 mean(1,2,3)平均 std 标准差/*自动导入*/Libname cwb “”;proc import datafile="E:WORK杂七杂八SAS数据库分期乐入池数据明细数据(风险指标监控表&实际还款表)荷包一期_风险指标监控表详细总表&today_date.xls" out=work.risk001 dbms=excel replace; sheet='风险指标监控表总表' getnames=yes; 导入源文件字段名作为SAS数据集的字段名自动导入 %macro chouc(today_date); datafile="C:Users抽查一债权基础池报表&today_date.xls" out=work.chouc01 dbms=excel replace; sheet="债权基础池" run; %mend; %chouc(20170116);导出:proc export data=sashelp.cars outfile= '/folders/myfolders/sasuser.v94/TutorialsPoint/car_data.csv' dbms=csv; run;/4*先排序*/proc sort data=new_cy2;by _COL1 ;run;/*标题*打印*/proc print ;title '员工薪水'RUN;proc print data=cwb.result20170208 label; id package; var overdue_principal_za overdue_principal_fql; label overdue_principal_za="违法停车" overdue_principal_fql="林肯" ;run;proc print data=cwb.result20170208;where principal_sell > 20000000;run;/*统计表格*/1proc tabulate data=work.risk005b;class _COL22; 分类变量var _COL5;分析变量table _COL22 _COL5;列维说明/选项(math chinese)*(mean var)反映男女生数学语文成绩的平均值方差run;2proc tabulate data=work.risk005b;class _COL22;var _COL5 _COL4;table _COL22, (_COL4 _COL5)*(mean max min);run;table _COL22 all, (_COL4 _COL5)*(mean max min); 加个ALL,有求和,图2.也可写为例如table (_COL22 all)*(_COL4)*(mean);如图也可加标签 table (sex all), (math chinese)*(mean std);keylabel mean='平均值' std='标准差' all='总计'label sex='性别' math='数学' chinese='语文'run;-/*复制*强制追加*把B加到A中*而SET是要新建一个的*/proc append base=chouc.chouc01 data=chouc.chouc02 force;/*force强制*/run;/*复制*强制追加*把B加到A中*而SET是要新建一个的*/proc sort data=chouc.chouc01 dupout=chouc.bb noduprecs;/*noduprecs一行完全相同的重复记录*/by descending _COL0 ;run;-/*sql*联合查询*/Proc sql;Select * from aUnion /*多个查询结果合并成一个结果,并去掉重复*/Intersect /*查询公共部分数据*/Except /*把公共部分去掉的数据*/Outer union /*对多个查询结果横向合并*/Select * from b;Quit;-/*去*重复*方法之一*/只显示nameselect name from tb group by name having count(*) > 1显示所有数据select * from name in (select name from tb group by name having count(*) > 1)*2、输出重复的观测值,使用nodupkey选项,注意:使用该选项后直接将原数据集中的重复值删除。注意dupout和out的区别;proc sort data=clasdata dupout=dups3 nodupkey; by name class;run;*输出不重复的观测值。;proc sort data=clasdata out=nodups4 nodupkey; by name class;run;*3、去掉重复的观测值使用nodup选项。;proc sort data =Clasdata out =NODUPS5 nodup; by name class;run ;/*删除*2017年1月3日的数据*/data cwb.result;set cwb.result;if account_time="03jan2017"d then delete;run;SQL 去重复:SELECT DISTINCT "栏位名" FROM "表格名" 举例来说,若要在以下的表格,Store_Information,找出所有不同的店名时,Store_Information表格store_nameSalesDate1$1500Jan-05-19992$250Jan-07-19991$300Jan-08-19993$700Jan-08-1999我們就鍵入,distinctSELECT DISTINCT store_name FROM Store_Information結果:123-/*频率*一般用这个*/proc freq data = asd;tables .变量 run;/*描述统计*例如*/proc summary data=work.risk004 mean max min cv var;class _COL4;var _COL4;output out=aa mean=a max=b min=c cv=d var=e;run;proc print data=work.aa;run;/*分类*频次*/proc means data=cwb.Overdue20170102;var veight;/*变量占比,即在总体中的相对重要程度*/freq _COL4;/*按照_COL4求观测值频度*/class _COL222;/*按_COL22分类*/run;/*MEANS、SUMMARY*功能之外*还可以如例绘制PLOTS茎叶图、盒形图、正态概率图/NORMAL正态分布*/proc univariate data=work.aa plots ;var _col4;run;/*取第2行到第10行的数据*/Data aa;Set aaa (firstobs=2 obs=10)/*取前2行数据*/Data aa;Set aaa (obs=2);Run;-/1*筛选例*/data a(where=(_COL10>1500);set cy;where _COL21>18 and _COL18>1500;where _COL29="上班人群"run;-/2*有7000个人,随机抽样25个*/PROC SURVEYSELECT DATA = a out = new_cy2 method = srs /*固定格式*/n =25 seed = 7000;RUN;/*读取*几条记录*/PROC SQL NUMBER OUTOBS=3;SELECT * FROM WORK.A;QUIT;/3*有1000个人,按性别(sex)分为两层,每层按0.1的比例抽样*/proc surveyselect data=cout=dmethod=srssamprate=0,1seed=25070419; strata sex;run; proc print data=d;run;/*分层抽取*/proc surveyselect /*随机*/data = Work.chouc01out = chouc.chouc01method = srssampsize = (5, 5,15 )/*抽取数据量*顺序与分层例相同*/noprint;strata _col22; /*分层*/run;/*求平均值*/proc sql;select _COL3,mean(loan_amt,repay_amount)from fql;quit;/* N,10分位等 proc means data=wj.yrs_201612 N P1 p10 p25 p50 p75 p90 p99 min max mean sum maxdec=2;/*规定最多输出2位小数*/