forked from garrettpedersen/spring1_orangehw9
-
Notifications
You must be signed in to change notification settings - Fork 0
/
DOE_HW1_update_1
103 lines (89 loc) · 2.76 KB
/
DOE_HW1_update_1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*Design of Experiments: Homework 1*/
/*IMPORT DATA*/
libname zipline '\\vmware-host\Shared Folders\Desktop\SAS Programming\SAS Datasets';
data zip;
set zipline.rduch (drop=VAR1);
/*Creating distance variables to each location*/
distloc1=sqrt(((long--78.878130)**2)+((lat-35.89314)**2));
distloc2=sqrt(((long--78.875880)**2)+((lat-35.74628)**2));
distloc3=sqrt(((long--78.676540)**2)+((lat-35.7724)**2));
distloc4=sqrt(((long--79.054280)**2)+((lat-35.90535)**2));
distloc5=sqrt(((long--78.575981)**2)+((lat-35.86696)**2));
/*Find minimum distance*/
closest_dist=min(distloc1, distloc2, distloc3, distloc4, distloc5);
/*Create variable closest_loc to assign the closest location*/
if closest_dist=distloc1 then closest_loc='Location 1';
else if closest_dist=distloc2 then closest_loc='Location 2';
else if closest_dist=distloc3 then closest_loc='Location 3';
else if closest_dist=distloc4 then closest_loc='Location 4';
else closest_loc='Location 5';
/*Create distance class - these do not need to be equally spaced*/
if closest_dist lt 0.05 then distance_class='Closest';
else if 0.05 ge closest_dist lt 0.1 then distance_class='Close';
else if 0.1 ge closest_dist lt 0.15 then distance_class='Medium';
else if 0.15 ge closest_dist lt 0.2 then distance_class='Far';
else distance_class='Farthest';
/*Create weight variable for sampling*/
weight=1/closest_dist;
run;
/*DATA EXPLORATION*/
proc univariate data=zip;
var closest_dist;
histogram closest_dist / normal(color=red);
run;
proc univariate data=zip;
var lat long;
histogram lat / normal(color=red);
histogram long / normal(color=blue);
run;
proc means data=zip;
class closest_loc;
var closest_dist;
run;
quit;
proc print data=zip (obs=10);
run;
proc sgplot data=zip;
vbar closest_loc / stat=percent dataskin=crisp;
xaxis display=(nolabel noticks);
yaxis grid;
title 'Closest Locations by Percent';
run;
title;
/*Should we aim to get representative people by location?*/
/*PROC POWER;
ONESAMPLEMEANS TEST=t
NULLMEAN=200
MEAN=225
STDDEV=20
ALPHA=0.05
POWER=0.9
NTOTAL=.;
RUN;*/
/*-------------------------------------------------------------------*/
/*Simple Random Sample*/
proc surveyselect data=zip
method=srs n=8000 out=SRS;
title 'Simple Random Sample';
run;
/*-------------------------------------------------------------------*/
/*Stratified Random Sample - Location*/
/*Sort into Location*/
proc sort data=zip;
by closest_loc;
run;
/*Sample*/
proc surveyselect data=zip
method=srs n=1600
seed=8675309 out=StrataSample;
strata closest_loc;
run;
/*-------------------------------------------------------------------*/
/*Stratified by Location Proportionally*/
proc surveyselect data=zip
out=strat_samp_prop
seed=8675309
method=srs
sampsize=8000;
strata closest_loc / alloc=prop;
run;