The following code will read your data set and provide a report for missing values for numeric variables.
/*-------------------------------------------------------*
| SIMULATE MISSING DATA
*-------------------------------------------------------*/
DATA MYDATA;
INPUT Y VAR1 VAR2 VAR3 VAR4 $ VAR5 $;
CARDS;
1 10 33 4 M A
0 . 21 3 . B
1 30 . 2 M C
1 20 76 1 F A
0 . 24 3 F .
0 20 22 . M A
1 10 . 2 . A
1 . 49 2 F C
0 30 . 2 F B
1 20 59 2 M B
1 20 76 1 . A
0 . 24 3 F C
0 20 22 . F C
1 10 . 2 M .
1 . 49 2 M C
0 30 . 2 F A
;
RUN;
/*-------------------------------------------------------*
| ANALYZE AND REPORT MISSING CHARACTER VARIABLES
*-------------------------------------------------------*/
PROC FREQ DATA = mydata;
TABLES _CHARACTER_ / MISSING;
RUN;
/*-------------------------------------------------------*
| ANALYZE AND REPORT MISSING NUMERIC VARIABLES
*-------------------------------------------------------*/
PROC MEANS DATA = MYDATA NMISS;
VAR VAR1 VAR2 VAR3 ; /* ENTER NUMERIC VARIABLES OF INTEREST*/
OUTPUT OUT=T (DROP=_TYPE_ _FREQ_) NMISS=/AUTONAME;
RUN;
PROC TRANSPOSE DATA = T PREFIX=NMISS OUT=S1;
VAR _NUMERIC_;
RUN;
DATA S2;
SET S1;
PMISS = NMISS1/16*100; /*DENOMINATOR = TOTAL N IN DATA SET*/
RUN;
PROC PRINT DATA = S2;
RUN;
(output below)
*--CHARACTER VARIABLES--*
*----NUMERIC VARIABLES ---*
*--CHARACTER VARIABLES--*
VAR4 | Frequency | Percent | Cumulative Frequency | Cumulative Percent |
---|---|---|---|---|
3 | 18.75 | 3 | 18.75 | |
F | 7 | 43.75 | 10 | 62.50 |
M | 6 | 37.50 | 16 | 100.00 |
VAR5 | Frequency | Percent | Cumulative Frequency | Cumulative Percent |
---|---|---|---|---|
2 | 12.50 | 2 | 12.50 | |
A | 6 | 37.50 | 8 | 50.00 |
B | 3 | 18.75 | 11 | 68.75 |
C | 5 | 31.25 | 16 | 100.00 |
*----NUMERIC VARIABLES ---*
Obs | _NAME_ | NMISS1 | PMISS |
1 | VAR1_NMiss | 5 | 31.25 |
2 | VAR2_NMiss | 5 | 31.25 |
3 | VAR3_NMiss | 2 | 12.50 |
No comments:
Post a Comment