## Friday, May 6, 2011

### SAS Code for Missing Data Analysis

The following code will read your data set and provide a report for missing values for numeric variables.

/*-------------------------------------------------------*
|  SIMULATE MISSING DATA
*-------------------------------------------------------*/

DATA MYDATA;
INPUT Y VAR1 VAR2 VAR3 VAR4 \$ VAR5 \$;
CARDS;
1      10     33     4  M  A
0      .      21     3  .  B
1      30     .      2  M  C
1      20     76     1  F  A
0      .      24     3  F  .
0      20     22     .  M  A
1      10     .      2  .  A
1      .      49     2  F  C
0      30     .      2  F  B
1      20     59     2  M  B
1      20     76     1  .  A
0      .      24     3  F  C
0      20     22     .  F  C
1      10     .      2  M  .
1      .      49     2  M  C
0      30     .      2  F  A
;
RUN;

/*-------------------------------------------------------*
| ANALYZE AND REPORT MISSING CHARACTER VARIABLES
*-------------------------------------------------------*/

PROC FREQ DATA = mydata;
TABLES _CHARACTER_ / MISSING;
RUN;

/*-------------------------------------------------------*
| ANALYZE AND REPORT MISSING NUMERIC VARIABLES
*-------------------------------------------------------*/

PROC MEANS DATA = MYDATA NMISS;
VAR  VAR1 VAR2 VAR3 ; /* ENTER NUMERIC VARIABLES OF INTEREST*/
OUTPUT OUT=T (DROP=_TYPE_ _FREQ_) NMISS=/AUTONAME;
RUN;

PROC TRANSPOSE DATA = T PREFIX=NMISS OUT=S1;
VAR   _NUMERIC_;
RUN;

DATA S2;
SET S1;
PMISS = NMISS1/16*100; /*DENOMINATOR = TOTAL N IN DATA SET*/
RUN;

PROC PRINT DATA = S2;
RUN;

(output below)

*--CHARACTER VARIABLES--*

VAR4 Frequency Percent Cumulative
Frequency
Cumulative
Percent
3 18.75 3 18.75
F 7 43.75 10 62.50
M 6 37.50 16 100.00

VAR5 Frequency Percent Cumulative
Frequency
Cumulative
Percent
2 12.50 2 12.50
A 6 37.50 8 50.00
B 3 18.75 11 68.75
C 5 31.25 16 100.00

*----NUMERIC VARIABLES ---*

 Obs _NAME_ NMISS1 PMISS 1 VAR1_NMiss 5 31.25 2 VAR2_NMiss 5 31.25 3 VAR3_NMiss 2 12.50