ANCOVA of rattlesnake data used in Chapter 7, and randomization tests based on F-ratios and Sums of Squares. Data file is stored in file ‘bscut2'. File contains 54 observations and four variables. Variables are: SVL (snout-vent length for each snake), SEX (sex of each individual), SITE (location where each individual was captured), and AGE (number of buttons on rattle as a proxy for age). Model is a two-way ANCOVA. SVL is the dependent variable; SEX and SITE are fixed effects and AGE is the covariate. Counts that are produced by the final PROC MEANS statements should be divided by 1001 to give the correct probability levels.

PROC GLM DATA=bscut2; /* Use GLM to run analysis to get critical cut-off values for CLASS SEX SITE; F-ratios and SS */

MODEL SVL = SEX SITE SEX*SITE AGE/SS3;

DATA _NULLl_; /* Text file, data1.txt, with a column of random numbers (variable z) and a column of iteration cycle identifiers (variable c) is created. This data step creates 54,000 observations because the original data file contained 54 observations and we will execute 1000 randomizations */

ARRAY C(1000);

ARRAY Z(54);

DO J=1 TO 1000;

DO I=1 TO 54;

C(I)=J;

Z(I)= UNIFORM(12876);

FILE 'c:\data1.txt';

PUT C(I) Z(I);

END;

END;

DATA TEMP1; /* Create SAS data file from text file */

INFILE 'c:\data1.txt';

INPUT C Z;

PROC RANK DATA=TEMP1 /* Rank and replace the random numbers (variable z) within */

OUT=TEMP2; /* each iteration (by c). Rankings range from 1 to 54, equal */

VAR Z; /* to the number of observations. Store data in file temp2 */

BY C;

DATA TEMP3 (KEEP=C SITE SEX AGE SVL); /* Create a temp file (temp3) with 1000 copies */

SET bscut2; /* of the original data and keep the iteration id */

DO C=1 TO 1000; /*(variable c) */

OUTPUT;

END;

PROC SORT DATA=TEMP3 out=TEMP3; /* Sort the 1000 copies in temp3 by iteration id */

BY C; /*(variable c) in preparation to do a merge */

DATA TEMP4; /* Merge temp2 (file with ranks in random order) and temp3 (file */

BY C; /* with 1000 copies of original data. Merge is done by iteration id */

MERGE TEMP2 TEMP3 /* variable c). "Keep" option deletes data for SVL and age during */

(KEEP=C SEX SITE); /* the merge. Thus new file (temp4) contains only the iteration id, */

/*the rankings, and the class variables */

PROC SORT DATA=TEMP4 /* Sort temp4 by iteration id and ranking. This sorting creates a */

OUT=TEMP4; /* random ordering of the class variables (i.e., SEX and SITE) */

BY C Z; /* within each iteration */

DATA TEMP5; /* Merge temp4 (file with class variables in random order) and */

BY C; /* temp3 (file with 1000 copies of original data in original order). */

MERGE TEMP4 /* Merge is done by iteration id (variable c). The final data file, */

(KEEP=SEX SITE C) /* temp5, contains 1000 sets of randomized observations */

TEMP3 (KEEP=SVL AGE C);

PROC GLM DATA=TEMP5 /* Use GLM to run ANCOVA on 1000 randomizations. Output */

OUTSTAT=ALLSTATS /* must be stored in a file, which is identified by the "outstat" */

NOPRINT; /*option. "Noprint" option suppresses printing output */

CLASS SEX SITE;

BY C;

MODEL SVL = SEX SITE SEX*SITE AGE/SS3 NOUNI;

DATA TEMP6 TEMP7; /* Using output from GLM procedure, create two temp files to save */

SET ALLSTATS; /* iterations for which the F-ratio and sums of squares were greater */

IF _SOURCE_ ='ERROR' /* than or equal to the critical cut-off values. Critical cut-off values */

THEN DELETE; /* are from analysis of original data (see output of the first PROC */

IF _SOURCE_ ='SEX' AND GLM run). /* File temp6 contains F-ratio data; file temp7 contains */

F=>11.53 THEN /* sums of squares data. "IF...THEN" statements use critical */

OUTPUT TEMP6; /* cut-off values to place iterations in the correct output file */

IF _SOURCE_ ='SITE' AND F=>4.63 THEN OUTPUT TEMP6;

IF _SOURCE_ = 'SEX*SITE' AND F=>2.59 THEN OUTPUT TEMP6;

IF _SOURCE_ = 'AGE' AND F=>111.06 THEN OUTPUT TEMP6;

IF _SOURCE_ ='SEX' AND SS=>218.126 THEN OUTPUT TEMP7;

IF _SOURCE_ ='SITE' AND SS=>87.60763 THEN OUTPUT TEMP7;

IF _SOURCE_ = 'SEX*SITE' AND SS=>48.98221 THEN OUTPUT TEMP7;

IF _SOURCE_ = 'AGE' AND SS=>2101.07836 THEN OUTPUT TEMP7;

KEEP _SOURCE_ F SS;

PROC SORT DATA=TEMP6 OUT=TEMP6; /* Sort temp6 by "_source_" in preparation for */

BY _SOURCE_; /* the PROC MEANS statements */

PROC MEANS N; /* Use PROC MEANS to count the number of cases in which F-ratio was */

/* equal to or greater than the critical cut-off values. Option "n" counts */

/* the number of cases */

VAR F; /* F-ratios are stored as variable f */

CLASS _SOURCE_; /* CLASS statement produces subtotals by "_source_", */

/*i.e. AGE, SEX, etc.* /

PROC SORT DATA=TEMP7 OUT=TEMP7; /* Sort temp7 by "_source_" */

BY _SOURCE_;

PROC MEANS N; /* Use PROC MEANS to count the number of cases in which the sums of */

VAR SS; /* squares were equal to or greater than the critical cut-off values. Option */

CLASS _SOURCE_; /* "n" counts the number of cases */