SAS Project Training 1
[The Code]
[The Code]
Below is all the SAS code used in this project:
libname a '/folders/myfolders';
proc contents data=a.online_retail;
run;
run;
proc format;
value missing
. = 'missing'
other = 'present'
;
value $missing
' ' = 'missing'
other = 'present'
;
run;
value missing
. = 'missing'
other = 'present'
;
value $missing
' ' = 'missing'
other = 'present'
;
run;
proc freq data=a.online_retail;
tables _all_ /missing;
format _numeric_ missing. _character_ $missing.;
run;
tables _all_ /missing;
format _numeric_ missing. _character_ $missing.;
run;
proc freq data=a.online_retail;
tables country;
run;
tables country;
run;
proc means data=a.online_retail min max mean median mode maxdec=2;
var unitprice quantity;
run;
var unitprice quantity;
run;
proc sql;
select *
from a.online_retail
where unitprice <0;
select *
from a.online_retail
where quantity <0;
quit;
select *
from a.online_retail
where unitprice <0;
select *
from a.online_retail
where quantity <0;
quit;
proc freq data=a.online_retail;
tables country;
where unitprice >0 and quantity >0;
run;
tables country;
where unitprice >0 and quantity >0;
run;
proc compare
base = a.online_retail
compare = a.france_sales_201110
listvar
novalues;
run;
base = a.online_retail
compare = a.france_sales_201110
listvar
novalues;
run;
data france_sales_201110_clean;
set a.france_sales_201110(rename=(customerID=CustomerID_char));
CustomerID=input(CustomerID_char,best32.);
format customerid best12.;
invoice_date=mdy(invoice_mon,invoice_day,invoice_year);
invoicedate=dhms(invoice_date,0,0,invoice_time);
format invoicedate datetime.;
drop CustomerID_char invoice_date invoice_mon invoice_day invoice_year invoice_time;
run;
set a.france_sales_201110(rename=(customerID=CustomerID_char));
CustomerID=input(CustomerID_char,best32.);
format customerid best12.;
invoice_date=mdy(invoice_mon,invoice_day,invoice_year);
invoicedate=dhms(invoice_date,0,0,invoice_time);
format invoicedate datetime.;
drop CustomerID_char invoice_date invoice_mon invoice_day invoice_year invoice_time;
run;
proc contents data=france_sales_201110_clean;
run;
run;
proc compare
base = a.online_retail
compare = france_sales_201110_clean
listvar
novalues;
run;
base = a.online_retail
compare = france_sales_201110_clean
listvar
novalues;
run;
data online_retail_new;
set a.online_retail france_sales_201110_clean;
run;
set a.online_retail france_sales_201110_clean;
run;
proc sort data=a.germany_sales_201110 out=germany_201110_sort;
by stockcode;
run;
proc sort data=a.germany_products out=germany_sort;
by stockcode;
run;
data germany_201110_combine;
length match_flag $20;
merge germany_201110_sort(in=a) germany_sort(in=b);
by stockcode;
if a and b then match_flag='in both';
else if a then match_flag='sales only';
else if b then match_flag='product list only';
run;
by stockcode;
run;
proc sort data=a.germany_products out=germany_sort;
by stockcode;
run;
data germany_201110_combine;
length match_flag $20;
merge germany_201110_sort(in=a) germany_sort(in=b);
by stockcode;
if a and b then match_flag='in both';
else if a then match_flag='sales only';
else if b then match_flag='product list only';
run;
proc freq data=germany_201110_combine;
tables match_flag;
run;
tables match_flag;
run;
data germany_201110_clean;
set germany_201110_combine;
where match_flag='in both';
drop match_flag;
run;
set germany_201110_combine;
where match_flag='in both';
drop match_flag;
run;
proc compare
base = a.online_retail
compare = germany_201110_clean
listvar
novalues;
run;
base = a.online_retail
compare = germany_201110_clean
listvar
novalues;
run;
data online_retail_new;
set a.online_retail germany_201110_clean;
run;
set a.online_retail germany_201110_clean;
run;
data online_retail_master;
set a.online_retail;
run;
set a.online_retail;
run;
%macro load(
data=,
country=
);
proc sql noprint;
select count(*) into :initialcount from online_retail_master
where country="&country."
;
quit;
%if &country. = France %then %do;
data &data._clean;
set &data.(rename=(customerID=CustomerID_char));
CustomerID=input(CustomerID_char,best32.);
format customerid best12.;
invoice_date=mdy(invoice_mon,invoice_day,invoice_year);
invoicedate=dhms(invoice_date,0,0,invoice_time);
format invoicedate datetime.;
drop CustomerID_char invoice_date invoice_mon invoice_day invoice_year invoice_time;
run;
%end;
%if &country. = Germany %then %do;
proc sort data=&data. out=sales_sort;
by stockcode;
run;
proc sort data=a.germany_products out=germany_sort;
by stockcode;
run;
data combine;
length match_flag $20;
merge sales_sort(in=a) germany_sort(in=b);
by stockcode;
if a and b then match_flag='in both';
else if a then match_flag='sales only';
else if b then match_flag='product list only';
run;
proc freq data=combine;
tables match_flag;
run;
data &data._clean;
set combine;
where match_flag='in both';
drop match_flag;
run;
%end;
proc append base=online_retail_master data=&data._clean;
run;
proc sql noprint;
select count(*) into :newcount from online_retail_master
where country="&country."
;
quit;
%put # of &Country records before load: &initialcount.;
%put # of &Country records after load: &newcount.;
%mend load;
data=,
country=
);
proc sql noprint;
select count(*) into :initialcount from online_retail_master
where country="&country."
;
quit;
%if &country. = France %then %do;
data &data._clean;
set &data.(rename=(customerID=CustomerID_char));
CustomerID=input(CustomerID_char,best32.);
format customerid best12.;
invoice_date=mdy(invoice_mon,invoice_day,invoice_year);
invoicedate=dhms(invoice_date,0,0,invoice_time);
format invoicedate datetime.;
drop CustomerID_char invoice_date invoice_mon invoice_day invoice_year invoice_time;
run;
%end;
%if &country. = Germany %then %do;
proc sort data=&data. out=sales_sort;
by stockcode;
run;
proc sort data=a.germany_products out=germany_sort;
by stockcode;
run;
data combine;
length match_flag $20;
merge sales_sort(in=a) germany_sort(in=b);
by stockcode;
if a and b then match_flag='in both';
else if a then match_flag='sales only';
else if b then match_flag='product list only';
run;
proc freq data=combine;
tables match_flag;
run;
data &data._clean;
set combine;
where match_flag='in both';
drop match_flag;
run;
%end;
proc append base=online_retail_master data=&data._clean;
run;
proc sql noprint;
select count(*) into :newcount from online_retail_master
where country="&country."
;
quit;
%put # of &Country records before load: &initialcount.;
%put # of &Country records after load: &newcount.;
%mend load;
%load(data=a.france_sales_201110,country=France);
%load(data=a.germany_sales_201110,country=Germany);