
/*******************************************************************\
| Title: Simple Random Sample Without Replacement |
| Goal : To select a random sample where no observation can be |
| chosen more than once. |
| Note : Method 1 below uses PROC SURVEYSELECT which is part of the |
| SAS/STAT package in Version 7 and above. If you have |
| Version 6 of SAS, you are limited to Methods 2 and 3 |
\*******************************************************************/
/* SAMPLE DATA
Data base of Student grade point averages from East High School,
Grades 9 through 12, 100 or more students per grade. */
data EastHigh;
format GPA 3.1;
do Grade=9 to 12;
do StudentID=1 to 100+int(201*ranuni(432098));
GPA=2.0 + (2.1*ranuni(34280));
output;
end;
end;
run;
* Method 1, using PROC SURVEYSELECT;
/* For a Simple Random Sample of 50 students. Use METHOD=SRS. N=
is the number of observations to select. The sample is stored
in the OUT= data set, SAMPLE. */
proc surveyselect data=EastHigh method=srs n=50 out=sample;
run;
proc print data=sample;
run;
* Method 2, Use Base SAS if you do not have SAS/STAT Version 7 or higher.
/* Simple Random Sample of 50 students. */
* Add a new variable X containing random numbers between 0 and 1;
data random;
set EastHigh;
x=ranuni(1234);
run;
* Randomly sort the data set;
proc sort data=random;
by x;
run;
* Keep the first n observations. Since the data points are randomly sorted,
these observations constitute a simple random sample;
data sample(drop x);
set random (obs=50);
run;
proc print data=sample;
run;
* Method 3, Using SAS DATA Step and no sort is required ;
/* Simple Random Sample of 50 students. */
data sample(drop=k n);
* Initialize K to the number of sample obs needed and N to the
total number of obs in the data set;
retain k 50 n;
if _n_=1 then n=total;
set EastHigh nobs=total;
/* To randomly select the first obs for the sample, use the fact
that each obs in the data set has an equal chance of being
selected: k/n. If a random number between 0 and 1 is less than
or equal to k/n, we select that the first obs for our sample
and also adjust k and the number of obs needed to complete the
sample */
if ranuni(1230498) <= k/n then
do;
output;
k=k-1;
end;
/* At every iteration, we adjust N, the number of obs left to
sample from */
n=n-1;
/* At each obs, check if a random number between 0 and 1 is less
than or equal to the current value of k/n, and if so, include
that obs in the sample. */
/* Once the desired number of sample points are taken, stop
iterating; */
if k=0 then stop;
run;
|