Friday, May 6, 2011

SAS Code for Missing Data Analysis

The following code will read your data set and provide a report for missing values for numeric variables. 

/*-------------------------------------------------------*
 |  SIMULATE MISSING DATA
 *-------------------------------------------------------*/

DATA MYDATA;
INPUT Y VAR1 VAR2 VAR3 VAR4 $ VAR5 $;
CARDS;
1      10     33     4  M  A
0      .      21     3  .  B
1      30     .      2  M  C
1      20     76     1  F  A
0      .      24     3  F  .
0      20     22     .  M  A
1      10     .      2  .  A
1      .      49     2  F  C
0      30     .      2  F  B
1      20     59     2  M  B
1      20     76     1  .  A
0      .      24     3  F  C
0      20     22     .  F  C
1      10     .      2  M  .
1      .      49     2  M  C
0      30     .      2  F  A
;
RUN;

/*-------------------------------------------------------*
 | ANALYZE AND REPORT MISSING CHARACTER VARIABLES
 *-------------------------------------------------------*/

PROC FREQ DATA = mydata;
TABLES _CHARACTER_ / MISSING;
RUN;

/*-------------------------------------------------------*
 | ANALYZE AND REPORT MISSING NUMERIC VARIABLES
 *-------------------------------------------------------*/

PROC MEANS DATA = MYDATA NMISS;
VAR  VAR1 VAR2 VAR3 ; /* ENTER NUMERIC VARIABLES OF INTEREST*/
OUTPUT OUT=T (DROP=_TYPE_ _FREQ_) NMISS=/AUTONAME;
RUN;

PROC TRANSPOSE DATA = T PREFIX=NMISS OUT=S1;
VAR   _NUMERIC_;
RUN;

DATA S2;
SET S1;
PMISS = NMISS1/16*100; /*DENOMINATOR = TOTAL N IN DATA SET*/
RUN;

PROC PRINT DATA = S2;
RUN;


(output below)

*--CHARACTER VARIABLES--*


VAR4 Frequency Percent Cumulative
Frequency
Cumulative
Percent
3 18.75 3 18.75
F 7 43.75 10 62.50
M 6 37.50 16 100.00

VAR5 Frequency Percent Cumulative
Frequency
Cumulative
Percent
2 12.50 2 12.50
A 6 37.50 8 50.00
B 3 18.75 11 68.75
C 5 31.25 16 100.00


 *----NUMERIC VARIABLES ---*

Obs
_NAME_
NMISS1
PMISS
1
VAR1_NMiss
5
31.25
2
VAR2_NMiss
5
31.25
3
VAR3_NMiss
2
12.50

No comments:

Post a Comment