* BASIC PROGRAM TO USE PRINCIPAL COMPONENTS ANALYSIS TO IDENTIFY DIETARY PATTERNS. SPECIFICATIONS FOR INPUT DATASET: ONE OBSERVATION PER PERSON (ID = IDENTIFYING VARIABLE) ONE VARIABLE PER FOOD (FOOD1 ... FOOD100) GENERALLY, EACH FOOD VARIABLE IS IN FREQUENCY FORMAT, E.G., SVGS/MONTH ; libname in 'c:\'; * PRINCIPAL COMPONENTS ANALYSIS NOTE THAT DATASET INCLUDES ONLY THE VARIABLES TO BE INCLUDED IN THE PRINCIPAL COMPONENTS ANALYSIS FOR DESCRIPTION OF OPTIONS, SUCH AS TYPE OF ROTATION, SEE SAS/STAT MANUAL, OR HATCHER, LARRY, A STEP-BY-STEP APPROACH TO USING THE SAS SYSTEM FOR FACTOR ANALYSIS AND STRUCTURAL EQUATION MODELING, CARY, NC: SAS INSTITUTE INC., 1994; data DATASET1A; set DATASET1 (drop=ID); run; proc corr data=DATASET1A out=CORREL noprint; run; proc factor data=CORREL method=prin mineigen scree rotate=varimax reorder; run; * NUMBER OF COMPONENTS TO RETAIN BASED ON SCREE PLOTS, INTERPRETABILITY, AND OTHER CRITERIA - SEE, FOR EXAMPLE, DEVELLIS, ROBERT F., SCALE DEVELOPMENT: THEORY AND APPLICATIONS, NEWBURY PARK, CA: SAGE PUBLICATIONS, INC., 1991. IN THIS EXAMPLE, RETAIN 2 COMPONENTS; proc corr data=DATASET1A out=CORREL noprint; run; proc factor data=CORREL method=prin scree nfact=2 rotate=varimax reorder; run; * SELECT ITEMS POTENTIALLY ASSOCIATED WITH EACH PATTERN, AND STANDARDIZE TO ACCOUNT FOR DIFFERENT RANGES OF INTAKE FOR EXAMPLE, IF FOODS 1-15 (FOOD1-15) ARE ASSOCIATED WITH PATTERN 1, AND FOODS 16-25 (FOOD16-25) ARE ASSOCIATED WITH PATTERN 2 ... ; data DATASET2; set DATASET1; * PATTERN 1; FOOD1_S = FOOD1; FOOD2_S = FOOD2; FOOD3_S = FOOD3; FOOD4_S = FOOD4; FOOD5_S = FOOD5; FOOD6_S = FOOD6; FOOD7_S = FOOD7; FOOD8_S = FOOD8; FOOD9_S = FOOD9; FOOD10_S = FOOD10; FOOD11_S = FOOD11; FOOD12_S = FOOD12; FOOD13_S = FOOD13; FOOD14_S = FOOD14; FOOD15_S = FOOD15; * PATTERN 2; FOOD16_S = FOOD16; FOOD17_S = FOOD17; FOOD18_S = FOOD18; FOOD19_S = FOOD19; FOOD20_S = FOOD20; FOOD21_S = FOOD21; FOOD22_S = FOOD22; FOOD23_S = FOOD23; FOOD24_S = FOOD24; FOOD25_S = FOOD25; run; * TO REVERSE SCORING FOR FOOD ITEMS WITH STRONG NEGATIVE LOADINGS E.G., IF FOOD14 HAD A STRONG NEGATIVE LOADING ON PATTERN 1; data DATASET2; set DATASET2; food14_sr = food14_s * -1; run; proc standard data=DATASET2 out=DATASET3 mean=0 std=1; var FOOD1_S FOOD2_S FOOD3_S FOOD4_S FOOD5_S FOOD6_S FOOD7_S FOOD8_S FOOD9_S FOOD10_S FOOD11_S FOOD12_S FOOD13_S FOOD14_S FOOD15_S FOOD16_S FOOD17_S FOOD18_S FOOD19_S FOOD20_S FOOD21_S FOOD22_S FOOD23_S FOOD24_S FOOD25_S ; run; * CALCULATE PATTERN SCORES AS UNWEIGHTED SUM OF STANDARDIZED INTAKE FREQUENCIES. TWO METHODS ARE PRESENTED BELOW - ONE ASSUMING NO MISSING DATA, AND SECOND ALLOWING FOR SOME IMPUTATION OF MISSING VALUES FOR INDIVIDUAL FOODS WITHIN A PATTERN; * METHOD 1 (NO MISSING DATA); data DATASET4; set DATASET3 (keep=ID FOOD1_S FOOD2_S FOOD3_S FOOD4_S FOOD5_S FOOD6_S FOOD7_S FOOD8_S FOOD9_S FOOD10_S FOOD11_S FOOD12_S FOOD13_S FOOD14_SR FOOD15_S FOOD16_S FOOD17_S FOOD18_S FOOD19_S FOOD20_S FOOD21_S FOOD22_S FOOD23_S FOOD24_S FOOD25_S ); PATTERN1 = FOOD1_S + FOOD2_S + FOOD3_S + FOOD4_S + FOOD5_S + FOOD6_S + FOOD7_S + FOOD8_S + FOOD9_S + FOOD10_S + FOOD11_S + FOOD12_S + FOOD13_S + FOOD14_SR + FOOD15_S; PATTERN2 = FOOD16_S + FOOD17_S + FOOD18_S + FOOD19_S + FOOD20_S + FOOD21_S FOOD22_S + FOOD23_S + FOOD24_S + FOOD25_S ; run; * METHOD 2 (TO ACCOUNT FOR MISSING DATA) THIS METHOD IMPUTES MISSING VALUES FOR FOODS WITHIN A PATTERN AS THE AVERAGE STANDARDIZED INTAKE FOR OTHER NON-MISSING FOODS WITHIN THAT PATTERN, BUT ONLY IF MOST (70-75%) OF THE FOOD ITEMS IN THE PATTERN ARE NON-MISSING; data DATASET4; set DATASET3 (keep=ID FOOD1_S FOOD2_S FOOD3_S FOOD4_S FOOD5_S FOOD6_S FOOD7_S FOOD8_S FOOD9_S FOOD10_S FOOD11_S FOOD12_S FOOD13_S FOOD14_SR FOOD15_S FOOD16_S FOOD17_S FOOD18_S FOOD19_S FOOD20_S FOOD21_S FOOD22_S FOOD23_S FOOD24_S FOOD25_S ); * COUNT NUMBER OF NON-MISSING FOOD ITEMS WITHIN THE PATTERN; COUNTP1 = 0; IF FOOD 1_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 2_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 3_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 4_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 5_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 6_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 7_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 8_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 9_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 10_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 11_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 12_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 13_S NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 14_SR NE . THEN COUNTP1 = COUNTP1+1; IF FOOD 15_S NE . THEN COUNTP1 = COUNTP1+1; * CALCULATE PATTERN SCORE; PATTERN1 = 15 * (SUM(FOOD1_S,FOOD2_S,FOOD3_S,FOOD4_S,FOOD5_S, FOOD6_S,FOOD7_S,FOOD8_S,FOOD9_S,FOOD10_S, FOOD11_S,FOOD12_S,FOOD13_S,FOOD14_SR,FOOD15_S)/COUNTP1); * IF TOO MANY MISSING VALUES, SET SCORE TO MISSING; IF COUNTP1 < 11 THEN PATTERN1 = .; * REPEAT FOR SECOND DIETARY PATTERN; COUNTP2 = 0; IF FOOD 16_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 17_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 18_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 19_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 20_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 21_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 22_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 23_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 24_S NE . THEN COUNTP2 = COUNTP2+1; IF FOOD 25_S NE . THEN COUNTP2 = COUNTP2+1; PATTERN2 = 10 * (SUM(FOOD16_S,FOOD17_S,FOOD18_S,FOOD19_S,FOOD20_S, FOOD21_S,FOOD22_S,FOOD23_S,FOOD24_S,FOOD25_S)/COUNTP2); IF COUNTP2 < 7 THEN PATTERN2 = .; run;