/* Create Data and Format Date */
data bookcode.total_sales;
input Date mmddyy10. +2 Amount comma5.;
FORMAT DATE mmddyy10.;
datalines;
09/05/2013 1,382
10/19/2013 1,235
11/30/2013 2,391
;
run;
/* Create Table and Add 2 Variables to Create a new Variable */
data bookcode.weight_club;
input IdNumber 1-4 Name $ 6-24 Team $ StartWeight EndWeight;
MeanWeight = Mean(StartWeight, EndWeight);
WeightLoss = StartWeight - EndWeight;
LossPercentage = WeightLoss / StartWeight * 100;
datalines;
1023 David Shaw red 189 165
1049 Amelia Serrano yellow 145 124
1219 Alan Nance red 210 192
1246 Ravi Sinha yellow 194 177
1078 Ashley McKnight red 127 118
;
run;
/* using print option to display table and title function to display table name while printing */
proc print data=bookcode.weight_club;
title 'Weight Chart of PhD Students';
Run;
/* creating table with missing values using DSD option */
data BookCode.club1;
infile datalines dsd dlm=',';
input IdNumber Name $ Team $ StartWeight EndWeight;
datalines;
1023,David,red,189,165
1049,Amelia,yellow,145,,
1219,Alan,,210,192
1246,Ravi,yellow,194,177
1078,Ashley,red,127,118
1221,Jim,yellow,220,.
;
proc print data=Bookcode.club1;
title 'Weight of Club Members';
run;
/* creating table AND APPLYING PROC UNIVARIATE ON 2 VARIABLES */
data BookCode.GAINS;
infile datalines dsd dlm=',';
input NAME $ HEIGHT WEIGHT;
datalines;
ALFRED,69,122.5
ALICIA,56.5,84
BARBARA,65.3,98
BANNETT,63.2,96.2
CAROL,62.8,102.5
CARLOS,63.7,102.9
;
RUN;
PROC UNIVARIATE DATA=BOOKCODE.GAINS;
VAR HEIGHT WEIGHT;
RUN;
/* ************************************************************************* */
data atifraza.cpi;
iNput country $ Y2001 Y2002 Y2003;
infile datalines dsd dlm=',';
datalines;
PAK,100,111,120
BNG,105,115,125
SRI,99,107,115
IND,103,111,118
BHU,106,113,122
NPL,94,105,117
;
RUN;
/* SORTING DATA BEFORE TRANSPOSING DATA
REMEMBER! PROC (ANY FUNCTION BY ANY VARIABLE)WILL WORK ONLY IF YOU
SORT THE DATA BY SAME VARIABLE */
PROC SORT DATA=atifraza.CPI;
BY country;
RUN;
/* RENAMING A PREVIOUSLY SET VARIABLE NAME */
PROC TRANSPOSE DATA=atifraza.CPI OUT=ATIFRAZA.TRANSCPI;
BY COUNTRY;
RUN;
/* RENAMING A PREVIOUSLY SET VARIABLE NAME */
DATA ATIFRAZA.PANELCPI (RENAME=(COL1=CPI _NAME_=YEARS));
SET ATIFRAZA.TRANSCPI;
RUN;
DATA ATIFRAZA.ESPAK;
SET ATIFRAZA.PANELCPI;
IF COUNTRY='PAK';
/* CREATING DIFFERENCIAL AND LAGS IN A TIME SERIES DATA */
DCPI_PK =DIF(CPI);
LAG_PK =LAG(CPI);
RUN;
/* CREATING LAGS IN PANEL DATA (IT WILL NOTWORK IN OFFLINE VERSION
BUT ONLY IN CLOUD */
PROC PANEL DATA=atifraza.PANELCPI;
ID COUNTRY YEARS;
LAG CPI(1)/OUT=ATIFRAZA.LAGPANEL;
RUN;
/* RANKING DATA ON THE BASIS OF INVOICE IN DESCENDING ORFER*/
proc rank data=SASHELP.CARS ties=dense descending out=WORK.RANKINVOICE;
var Invoice;
ranks rank_Invoice;
run;
/* STANDARDIZING VARIABLES X= (X-XBAR)/SD*/
/* ************************************************************************* */
PROC SORT DATA=RAAWTHES.thesisdata;
BY ORG;
RUN;
PROC MEANS DATA=RAAWTHES.THESISDATA maxdec=3;
VAR IPAS DPAS JS TI;
output out=atifraza.IDpractice
MAX= MAX_IPAS MAX_DPAS MAX_JS MAX_TI
MAXID(IPAS(ORG GND) DPAS(ORG GND) JS(ORG GND) TI(ORG
GND))=HiIPASO HiIPASG HiDPASO HiDPASG HiJSO HiJSG HiTIO HiTIG
RUN;
PROC MEANS DATA=RAAWTHES.THESISDATA maxdec=3 nmiss range uss css t prt
sumwgt skewness kurtosis;
VAR IPAS DPAS JS TI;
title 'Resquestion Assorted Statistics';
RUN;
proc format;
value age 1='young'
2='old'
3='veryold';
run;
data abc;
infile datalines dsd dlm=',';
input A B;
datalines;
1,2
2,1
.,2
.,.
1,1
2,1
;
run;
proc freq data=abc;
tables a*b;
TITLE '2 WAY CONTINGENCY TABLE';
RUN;
/* ************************************************************************* */
/* calculating mean, SD, min, max of invoice by type and make*/
proc sort data=thesis.thesisdata;
by gender;
run;
proc means data=thesis.thesisdata maxdec=3;
var IPAS DPAS JS TI;
class Gender Organization;
output out=Thesis.Thesis_Desc mean=Avg_IPA Avg_DPAS Avg_JS Avg_TI;
run;
proc sort data=thesis.thesisdata;
by organization;
run;
proc means data=thesis.thesisdata;
var IPAS DPAS JS TI;
by Organization;
output out=Thesis.max2
max=Mx_IPAS Mx_DPAS Mx_JS Mx_TI
maxid(IPAS(Gender) DPAS(Gender) JS(Gender) TI(Gender))=MaxIneff MaxDiss
MaxJS MaxTI;
Run;
proc means data=thesis.thesisdata;
var IPAS DPAS JS TI;
by Organization;
output out=Thesis.min2
max=Mn_IPAS Mn_DPAS Mn_JS Mn_TI
minid(IPAS(Gender) DPAS(Gender) JS(Gender) TI(Gender))=MinIneff MinDiss MinJS
MinTI;
Run;
/* ************************************************************************* */
data ATIFRAZA.datasheet1;
input IdNumber 1-4 Name $ 6-23 Team $ 25-33 SWeight 35-37 EWeight 39-41 Date
mmddyy9.;
FORMAT DATE EURDFDE9.;
Datalines;
1111 Muhammad Mursaleen HR 100 150 101812
1112 Beenish Finance 099 130 101712
1113 Awais Marketing 125 131 101416
1114 Saad Finance 200 210 101416
1115 Nasrullah Finance 139 199 100116
;
run;
/* ************************************************************************* */
data ATIFRAZA.datasheet1;
input IdNumber 1-4 Name $ 6-24 Team $ SWeight EWeight;
Datalines;
0001 Muhammad Mursaleen HR 100 150
0002 Beenish Finance 120 130
0003 Awais Marketing 125 131
0004 Saad Finance 191 210
0005 Nasrullah Finance 139 199
;
run;
/* ************************************************************************* */
proc means data=atifraza.grade noprint;
var FinalGrade;
class status year;
output out=Mean_Grade mean=Avg_Grade;
run;
* computing mean of Final Grade by Status and Year;
/* calculating mean, SD, min, max of invoice by type and make*/
proc sort data=atifraza.cars;
by make type;
run;
proc means data=atifraza.cars noprint;
var invoice;
class type make;
output out=Cake_Desc mean=Avg_Price std=SD_Price Min=Minimum_Price
Max=Maximum_Price;
run;
proc means data=atifraza.cake;
var PresentScore TasteScore;
class flavor;
output out=Cake_Desc mean=Avg_PS Avg_TS Std=SD_PS SD_TS;
run;
proc means data=atifraza.cake noprint;
var PresentScore TasteScore;
class flavor;
output out=Cake_Sum sum=S_PAS S_TS;
run;
/* calculating mean, SD, min, max of invoice by type and make*/
proc sort data=thesis.thesisdata;
by gender;
run;
proc means data=thesis.thesisdata maxdec=3;
var IPAS DPAS JS TI;
class Gender Organization;
output out=Thesis.libkaam mean=Avg_IPA Avg_DPAS Avg_JS Avg_TI;
run;
data ATIFRAZA.ThirdLecture;
input Name $ HomeOwn $ Age Income;
Datalines;
rodrick n 1 30000
smith n 2 25400
freiss y 1 42000
garcia y 1 42000
williams n 2 14000
mason n 2 20000
lopez n 2 18500
gregory n 1 30000
reid n 1 27000
schulman y 1 35000
garrett y 1 32000
zingraff y 2 35000
;
run;
proc means;
class age homeown;
var income;
/* ************************************************************************* */
data BookCode.GAINS;
infile datalines dsd dlm=',';
input NAME $ HEIGHT WEIGHT;
datalines;
ALFRED,69,122.5
ALICIA,56.5,84
BARBARA,65.3,98
BANNETT,63.2,96.2
CAROL,62.8,102.5
MODEL WEIGHT = SYSTOLIC DIASTOLIC HEIGHT;
OUTPUT OUT=PRACTICE.DIAG (KEEP=WEIGHT HEIGHT SYSTOLIC
DIASTOLIC R LEV CD DFFIT)
RSTUDENT=R H=LEV COOKD=CD DFFITS=DFFIT;
RUN;
/* PRINTING OBSERVATIONS THAT HAVE LEVERAGE > CUT-OFF POINT */
PROC PRINT DATA=PRACTICE.DIAG;
WHERE LEV>(2*3+2/5209);
RUN;
/* PRINTING OBSERVATIONS THAT HAVE RSTUDENT > CUT-OFF POINT
ALSO TAKE ABSOLUTE OF R SO THAT NEGATIVE VALUES MAY ALSO BE
INCLUDED */
PROC PRINT DATA=PRACTICE.DIAG;
WHERE ABS(R)>2;
RUN;
/* PRINTING OBSERVATIONS THAT HAVE COOKS'D > CUT-OFF POINT */
PROC PRINT DATA=PRACTICE.DIAG;
WHERE CD>(4/5209);
RUN;
/* LOOKING FOR HIGH INFLUENCE VALUES OBSERVATION-WISE */
PROC REG DATA=PRACTICE.HEART PLOTS(MAXPOINTS=NONE);
MODEL WEIGHT = SYSTOLIC DIASTOLIC HEIGHT/INFLUENCE;
RUN;
/* CREATING A NEW VARIABLE HAVING SERIAL/OBSERVATION NUMBER IN
ORDER TO CREATE A UNIQUE ID */
DATA PRACTICE.HEART;
SET PRACTICE.HEART;
OBS+1; /* CREATING ID VARIABLE */
RUN;
/* WORKING WITH DUMMY VARIABLES */
DATA PRACTICE.IRIS;
SET SASHELP.IRIS;
RUN;
PROC PRINT DATA=PRACTICE.IRIS;
RUN;
PROC REG DATA=PRACTICE.IRIS;
MODEL SEPALWIDTH = SEPALLENGTH;
RUN;
DATA PRACTICE.IRISD;
SET PRACTICE.IRIS;
IF SPECIES='Setosa' THEN DSETOSA=1;
ELSE DSETOSA=0;
IF SPECIES='Versicolor' THEN DVERSI=1;
ELSE DVERSI=0;
RUN;
PROC PRINT DATA=PRACTICE.IRISD;
RUN;
PROC REG DATA=PRACTICE.IRISD;
MODEL SEPALWIDTH = DSETOSA DVERSI;
RUN;
/* //////////...P.R.O.G.R.A.M.I.N.G... */
/* IF THEN ELSE */
DATA TOURS2;
SET PRACTICE.TOURS;
TOTALCOST=AIRCOST+20;
RUN;
DATA TOURS2;
SET PRACTICE.TOURS;
TC=SUM(AIRCOST,20);
RUN;
DATA TOURS3;
SET PRACTICE.TOURS;
IF VENDORS='hispania' then BONUS='No Bonus ';
ELSE IF VENDORS='major' THEN BONUS='All People';
ELSE BONUS='FOR 5+ People';
RUN;
DATA TOURS3;
SET PRACTICE.TOURS;
IF VENDORS='hispania' then NO_BONUS='YES';
ELSE IF VENDORS='major' THEN BONUS='YES';
ELSE BONUS='Dont know';
RUN;
DATA TOURS;
SET PRACTICE.TOURS;
IF VENDORS='hispania' THEN DELETE;
RUN;
DATA TOURS;
SET PRACTICE.TOURS;
TOTALCOST=AIRCOST+LANDCOST;
TOT_COST=SUM(AIRCOST,LANDCOST);
MULTI=AIRCOST*LANDCOST;
DIVIDE=AIRCOST/LANDCOST;
SQUARE=ROUND(SQRT(AIRCOST),5);
RUN;
DATA TOURS;
SET PRACTICE.TOURS;
NIGHT=ROUND(NIGHTS,5);
RUN;
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF BACKUPGUIDE=TOURGUIDE THEN REMARKS='Problem ';
ELSE IF BACKUPGUIDE OR TOURGUIDE ='' THEN REMARKS='CHECK
STATUS';
ELSE REMARKS='OK';
RUN;
/* SCANNING DIFFERENT BLOCKS FROM A CHARACTER VARIABLE USING
DILIMETER */
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
PART1=SCAN(EVENTDESCRIPTION,2,',');
RUN;
/* SCANNING AND ALLIGHNING RIGHT THE OBSERVATIONS OF NEW VARIABLE */
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
PART1=RIGHT(SCAN(EVENTDESCRIPTION,2,','));
RUN;
/* CONCATINATING MORE THAN 1 VARIABLES, COMMENTS, OR ENTERING NEW
VALUES WITH EXISTING VARIABLES */
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
ALLGUIDES=TOURGUIDE||' and '||BACKUPGUIDE;
RUN;
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
ALLGUIDES=TRIM(TOURGUIDE||' and '||BACKUPGUIDE);
RUN;
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF LANDCOST=. THEN TYPE='Haaw Hayee';
ELSE IF LANDCOST<500 THEN TYPE='Sasta';
ELSE IF LANDCOST<1000 THEN TYPE='Guzara';
ELSE TYPE='Mehnga';
RUN;
/* 28-12-2016 */
PROC PRINT DATA=PRACTICE.AIRTOUR;
RUN;
/* REMARKS ON THE BASIS OF SOME OTHER VARIABLE */
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF 500 <= LANDCOST <= 1000 THEN TYPE='Medium';
ELSE IF 1000 < LANDCOST <= 1500 THEN TYPE='Costly';
ELSE TYPE = 'Low';
RUN;
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF (NIGHTS > 3 OR NUMBEROFEVENTS > 5) AND (TOURGUIDE = 'lucas' OR
CITY='paris') THEN TYPE = 'Ha Ha Ha';
ELSE TYPE='He He';
RUN;
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF LANDCOST THEN REMARKS = 'OK ';
ELSE REMARKS = 'MISSING';
RUN;
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF TOURGUIDE = 'Lucas' THEN TYPE ='GROUP A';
ELSE TYPE = 'GROUP B';
RUN;
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF UPCASE(TOURGUIDE) = 'LUCAS' THEN TYPE ='GROUP A';
ELSE TYPE = 'GROUP B';
RUN;
/* IF YOU DON'T KNOW THE COMPLETE NAME, THEN USE :'L' AND IT WILL READ
ALL OBSERVATIONS STARTING WITH 'L' */
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF TOURGUIDE = :'L' THEN TYPE ='GROUP A';
ELSE TYPE = 'GROUP B';
RUN;
/* IF YOU WANT TO READ ALL THE OBSERVATIONS THAT ARE GREATER THAN L
(MEANS M,N,O AND SO ON...) */
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF TOURGUIDE > :'L' THEN TYPE ='GROUP A';
ELSE TYPE = 'GROUP B';
RUN;
/* USE A LETTER OR SET OF LETTERS TO READ ALL THE OBSERVATIONS THAT
CONTAINS THAT SET OF LETTERS */
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF INDEX(EVENTDESCRIPTION, 'other') THEN DOUBT ='YES';
ELSE DOUBT = 'NO';
RUN;
/* IF YOU DON'T KNOW THE CASE OF THE LETTER OR SET OF LETTER THAN USE
UPCASE */
DATA AIRTOUR;
SET PRACTICE.AIRTOUR;
IF INDEX(UPCASE(EVENTDESCRIPTION), 'OTHER') THEN DOUBT ='YES';
ELSE DOUBT = 'NO';
RUN;
/*IT WILL WRITE ALL OBSERVATIONS WITH NIGHTS > 6 IN AIRTOUR2 */
DATA AIRTOUR2;
SET PRACTICE.AIRTOUR;
IF NIGHTS >= 6;
RUN;
/* SEDING DATA OF 1 DATASET IN 2 NEW DATASETS USING IF THEN ELSE */
DATA AIRTOUR1 AIRTOUR2;
SET PRACTICE.AIRTOUR;
IF NIGHTS >= 6 THEN OUTPUT AIRTOUR1;
ELSE OUTPUT AIRTOUR2;
RUN;
DATA AIRTOUR1 AIRTOUR2;
SET PRACTICE.AIRTOUR;
IF NIGHTS >=6 THEN OUTPUT AIRTOUR1;
IF TOURGUIDE ='Lucas' THEN OUTPUT AIRTOUR2;
RUN;
/* SENDING DATA TO 2 NEW DATASETS USING IF THEN ELSE AND CREATING A
NEW VARIABLE IN NEW DATASETS */
DATA AIRTOUR1 AIRTOUR2;
SET PRACTICE.AIRTOUR;
TOTALNIGHTS=NIGHTS+1;
IF TOURGUIDE ='Lucas' THEN OUTPUT AIRTOUR1;
ELSE OUTPUT AIRTOUR2;
RUN;
PROC SORT DATA=PRACTICE.airtour;
BY CITY;
RUN;
/* SORTING DATA AND FINDING OUT FIRST AND LAST OBSERVATIONS BY TYPE */
DATA PRACTICE.CARS;
SET SASHELP.CARS;
RUN;
PROC SORT DATA=PRACTICE.CARS;
BY TYPE;
RUN;
PROC MEANS DATA=PRACTICE.CARS;
BY ORIGIN TYPE;
RUN;
DATA CARS;
SET PRACTICE.CARS;
BY TYPE;
FIRSTTYPE=FIRST.TYPE;
LASTTYPE=LAST.TYPE;
RUN;
/* USING FITST AND LAST OBSERVATION AND SEE WHICH MODEL AND TYPE IT IS
*/
DATA CARS2;
SET CARS;
IF FIRSTTYPE=1 THEN CARSTYPE=TYPE||' '||'START'||MODEL;
RUN;
PROC PRINT DATA=CARS2;
RUN;
PROC IMPORT DATAFILE="/folders/myfolders/PRACTICE/DISCOUNT.xlsx"
OUT=PRACTICE.DISCOUNT
DBMS=XLSX
REPLACE;
RUN;
/* TO DELETE DUPLICATE OBSERVATIONS */
PROC SORT DATA=PRACTICE.AIRTOUR OUT=PRACTICE.ABC NODUPRECS;
BY CITY;
RUN;
/* MERGING DATA SETS */
DATA NEW;
SET PRACTICE.DATA6 PRACTICE.DATA7;
RUN;
DATA NEW;
SET PRACTICE.DATA6 PRACTICE.DATA7;
BY YEAR;
RUN;
DATA MERGED;
MERGE PRACTICE.DATA6 PRACTICE.DATA7;
RUN;
DATA MERGED;
MERGE PRACTICE.DATA6 PRACTICE.DATA7;
BY YEAR;
RUN;
DATA ONE2ONE;
MERGE PRACTICE.CLASS(DROP=YEAR MAJOR)
PRACTICE.CLASS2(DROP=YEAR MAJOR RENAME=(NAME=NAME2));
RUN;
DATA FINCOM;
MERGE PRACTICE.FINANCE PRACTICE.COMPANY;
BY NAME;
RUN;
DATA FINCOM;
MERGE PRACTICE.FINANCE PRACTICE.COMPANY;
BY IDNUMBER;
RUN;
PROC SORT DATA=PRACTICE.SHOES;
BY TYPE;
RUN;
PROC SORT DATA=PRACTICE.DISCOUNT;
BY TYPE;
RUN;
DATA PRACTICE.SHOES;
SET PRACTICE.SHOES;
IF TYPE='C-train' THEN TYPE='C-Train';
RUN;
DATA SHOESS;
MERGE PRACTICE.SHOES PRACTICE.DISCOUNT;
BY TYPE;
RUN;
/* MERGING 2 DATA SETS AND CREATING NEW VARIABLES IN THE NEW DATASET
*/
DATA SHOESS;
MERGE PRACTICE.SHOES PRACTICE.DISCOUNT;
BY TYPE;
DAMOUNT=ADJUSTMENT*REGULARPRICE;
NETPRICE=REGULARPRICE-DAMOUNT;
RUN;
/* CALCULATING MEAN AND STORING IT IN A NEW DATASET */
PROC MEANS DATA=SHOESS;
VAR NETPRICE;
BY TYPE;
OUTPUT OUT=NETMEAN SUM(NETPRICE)=SUMM;
RUN;
/* NOW JOINING THE NEW DATASET INTO OLD BY TYPE */
DATA SHOESS (DROP= _TYPE_ _FREQ_);
MERGE PRACTICE.SHOES NETMEAN;
BY TYPE;
RUN;
/* USING -- TO ASSIGN LIST OF VARIABLES (ITEM13 TO ITEM26) */
DATA PRACTICE.PCA3 (KEEP=ITEM13--ITEM26);
SET PRACTICE.PCA2;
RUN;
/* PRINCIPAL COMPONENTS ANALYSIS */
PROC FACTOR DATA=PRACTICE.PCA3
SIMPLE
ROUND
SCREE
FLAG=.4
METHOD=PRIN
PRIORS=ONE
ROTATE=VARIMAX
NFACTORS=2
OUT=PRACTICE.PCARESULTS (RENAME=(FACTOR1=INS_CHAR
FACTOR2=INS_STU_REL));
RUN;
/* E . X . T . R . A . */
DATA PRACTICE.NAMES;
INPUT NAME $ 1-21 DATE MMDDYY10. ENGLISH 32-34 MATHS 36-38 SCIENCE
41-43;
FORMAT DATE DATE8.;
DATALINES;
Muhammad Atif Raza 10282016 059 075 088
Muhamad Mursaleen Ali 12252015 063 077 090
Chaudhry Awais Khalid 05152014 055 085 065
Miss Beenish Bashir 10132014 085 086 075
Muhammad Saad Baloch 07272015 055 069 099
Raja Mustansar Ali 08282017 051 059 075
;
RUN;
DATA SCANN;
SET PRACTICE.NAMES;
APA= SCAN(NAME,-1,' ')||', '||SCAN(NAME,-2,' ');
TOTALMARKS = ENGLISH+MATHS+SCIENCE;
PERCENTAGE= ROUND((TOTALMARKS/300)*100,.01);
IF PERCENTAGE >= 80 THEN GRADE = 'A';
ELSE IF PERCENTAGE >= 70 THEN GRADE = 'B';
ELSE IF PERCENTAGE >= 60 THEN GRADE = 'C';
ELSE GRADE = 'FAIL';
RUN;
PROC CONTENTS DATA=COM.CARS;
RUN;
PROC MEANS DATA=COM.CARS MEAN cv maxdec=2 nmiss kurtosis SKEWNESS;
CLASS ORIGIN TYPE;
RUN;
PROC SORT DATA=COM.CARS;
BY ORIGIN;
RUN;
PROC MEANS DATA=COM.CARS;
BY ORIGIN;
RUN;
PROC FREQ DATA=COM.CARS;
TABLE ORIGIN*TYPE/CHISQ;
RUN;
PROC UNIVARIATE DATA=COM.CARS;
VAR INVOICE;
RUN;
PROC MEANS DATA=COM.CARS;
VAR INVOICE;
OUTPUT OUT=COM.CARS3
MAX=MX_INCOICE
MAXID(INVOICE(TYPE ORIGIN MAKE))=MaxPrice MaxOrigin MaxMake
MIN=MIN_INCOICE
MINID(INVOICE(TYPE ORIGIN MAKE))=MinPrice MinOrigin MinMake;
RUN;
PROC CORR DATA=COM.CARS PLOTS=MATRIX(HISTOGRAM)
PLOTS(MAXPOINTS=NONE);
RUN;
PROC CORR DATA=COM.CARS ALPHA NOMISS;
RUN;
PROC CORR DATA=COM.HEART;
RUN;
PROC CORR DATA=COM.HEART PLOTS=matrix(HISTOGRAM)
PLOTS(MAXPOINTS=NONE);
VAR Cholesterol AgeAtStart Height Weight Diastolic;
RUN;
PROC REG DATA=COM.HEART PLOTS;
MODEL Cholesterol=AgeAtStart Height Weight Diastolic;
RUN;
/*
INTERPRETATION: OF BETA OF 1.2 OF AGE IN YEAR >>> ONE YEAR CHANGE IN
CHANGE WILL INCREASE THE CHOLESTROL BY 1.2 UNITS
NULL HYPOTHESIS IS "THERE IS NO EFFECT OF AGE AT START ON CHOLESTROL"
F VALUE IS 127 WITH A P-VALUE OF <.001 WHICH MEANS THAT THE MODEL IS FIT.
NULL HYPOTHESIS IS THAT THE MODEL IS NOT FIT
R-SQUARE TELLS THAT ONLY 9% OF VARIABILITY IN CHOLESTROL IS BEING
EXPLAINED BY THE INDEPENDENT VARIABLES
INTERCEPT OF 194 TELLS US THAT IF ALL THE INDEPENDENT VARIABLES ARE
ZERO THEN CHOLESTROL WILL BE AT 194 UNITS
*/
PROC REG DATA=COM.HEART PLOTS;
MODEL Cholesterol=AgeAtStart Height Weight Diastolic;
OUTPUT OUT=COM.DIAG (KEEP=CHOLESTEROL AGEATSTART HEIGHT
WEIGHT DIASTOLIC R LEV CD DFFIT)
RSTUDENT=R H=LEV COOKD=CD DFFITS=DFFIT;
RUN;
PROC PRINT DATA=COM.DIAG;
WHERE ABS(R)>2;
RUN;
PROC PRINT DATA=COM.DIAG;
WHERE LEV>((2*4+2)/5209);
RUN;
PROC PRINT DATA=COM.DIAG;
WHERE ABS(R)>2 AND LEV>((2*4+2)/5209);
RUN;
PROC REG DATA=COM.DIAG PLOTS;
MODEL Cholesterol=AgeAtStart Height Weight Diastolic;
WHERE ABS(R)<2 OR LEV<((2*4+2)/5209);
RUN;
PROC PRINT DATA=COM.DIAG;
WHERE ABS(DFFIT)>(2*SQRT(4/5209));
VAR Cholesterol AgeAtStart Height Weight Diastolic;
RUN;
PROC REG DATA=COM.DIAG PLOTS;
MODEL Cholesterol=AgeAtStart Height Weight Diastolic;
WHERE ABS(DFFIT)<(2*SQRT(4/5209));
RUN;
PROC PRINT DATA=COM.DIAG;
WHERE CD>4/5209;
VAR Cholesterol AgeAtStart Height Weight Diastolic;
RUN;
PROC REG DATA=COM.DIAG PLOTS;
MODEL Cholesterol=AgeAtStart Height Weight Diastolic;
WHERE CD<4/5209;
RUN;
PROC REG DATA=COM.heart;
MODEL Cholesterol=AgeAtStart Height Weight Diastolic/INFLUENCE;
OUTPUT OUT=COM.DFBETAS;
RUN;
DATA COM.DUMMY;
SET COM.CARS;
IF TYPE='SUV' THEN DSUV=1;
ELSE DSUV=0;
IF TYPE='Sedan' THEN DSEDAN=1;
ELSE DSEDAN=0;
IF TYPE='Sports' THEN DSPORTS=1;
ELSE DSPORTS=0;
IF TYPE='Hybrid' THEN DHYBRID=1;
ELSE DHYBRID=0;
IF TYPE='Truck' THEN DTRUCK=1;
ELSE DTRUCK=0;
IF TYPE='Wagon' THEN DWAGON=1;
ELSE DWAGON=0;
RUN;
PROC REG DATA=COM.DUMMY;
MODEL INVOICE=WEIGHT DSUV DSEDAN DSPORTS DHYBRID DTRUCK
DWAGON/NOINT;
RUN;
/*
IF THERE IS A CHANGE OF 1 LBS CHANGE IN WEIGHT, IT WILL GENERATE A 15.2 $
INCREASE (+) IN INVOICE
NULL HYPOTHESIS: INVOICE BEHAVES SAME ACROSS THT TYPES
IF ANY OF THE DUMMY IS SIGNIFICANT THEN WE CAN CONCLUSIVELY SAY
THAT TYPE MATTERS IN DETERMINATION OF PRICE
REGRESSION EQUATION FOR WAGON: Y = -25738 + 15.2 (W) [WHERE W IS THE
WEIGHT OF THE CAR]
*/
DATA COM.PCA;
SET PRACTICE.PCA2;
RUN;
PROC CONTENTS DATA=COM.PCA;
RUN;
PROC FACTOR DATA=COM.PCA
SIMPLE
ROUND
SCREE
FLAG=.4
METHOD=PRIN
ROTATE=VARIMAX
MINEIGEN=1
NFACTORS=5
PRIORS=ONE
OUT=COM.PCARESULTS(RENAME=(FACTOR1=CUL_DIME
FACTOR2=RACISMM FACTOR3=INSTRUCTOR FACTOR4=INS_STU
FACTOR5=BLA_BLA));
VAR ITEM13--ITEM52;
RUN;
PROC FACTOR DATA=COM.PCA
METHOD=P
PRIORS=MAX
SCREE
CORR
RES;
VAR ITEM13--ITEM52;
RUN;
/* ////////////// P.R.O.G.R.A.M.I.N.G */
DATA NEW;
SET PRACTICE.TOURS;
TOTALCOST=AIRCOST+20;
RUN;
DATA NEW;
SET PRACTICE.TOURS;
TOTALCOST=SUM(AIRCOST,20);
RUN;
DATA NEW;
SET PRACTICE.TOURS;
IF VENDORS='hispania' then BONUS ='YES';
ELSE IF VENDORS='major' THEN NOBONUS = 'YES';
RUN;
DATA NEW;
SET PRACTICE.TOURS;
IF VENDORS='hispania' then BONUS ='YES';
ELSE IF VENDORS='major' THEN NOBONUS = 'YES';
RUN;
DATA NEW;
SET PRACTICE.TOURS;
IF VENDORS='hispania' THEN REMARKS ='BONUS ';
ELSE IF VENDORS='major' THEN REMARKS ='NO BONUS ';
ELSE REMARKS ='FORGET IT';
RUN;
DATA NEW;
SET PRACTICE.TOURS;
IF VENDORS='hispania' THEN DELETE;
RUN;
DATA NEW;
SET PRACTICE.TOURS;
TOTALCOST=AIRCOST+LANDCOST;
MINUS=AIRCOST-LANDCOST;
MULTIPLY=AIRCOST*LANDCOST;
DIVIDE=AIRCOST/LANDCOST;
ADD=SUM(AIRCOST,LANDCOST);
RUN;
DATA NEW;
SET PRACTICE.TOURS;
NIGHTR=ROUND(NIGHTS,5);
RUN;
DATA NEW;
SET PRACTICE.TOURS;
NIGHTR=ROUND(SUM(LANDCOST,AIRCOST),5);
RUN;
DATA NEW;
SET PRACTICE.AIRTOUR;
IF TOURGUIDE=BACKUPGUIDE THEN REMARKS ='PROBLEM';
ELSE IF TOURGUIDE =' ' OR BACKUPGUIDE =' ' THEN REMARKS ='CHECK';
ELSE REMARKS='OK';
RUN;
DATA NEW;
SET PRACTICE.AIRTOUR;
PART1=SCAN(EVENTDESCRIPTION,1,',');
RUN;
DATA NEW;
SET PRACTICE.AIRTOUR;
PART2=RIGHT(SCAN(EVENTDESCRIPTION,2,','));
RUN;
DATA NEW;
SET PRACTICE.AIRTOUR;
ALLTOURS=TOURGUIDE||BACKUPGUIDE;
RUN;
DATA NEW;
SET PRACTICE.AIRTOUR;
ALLTOURS=TRIM(TOURGUIDE||', '||BACKUPGUIDE);
RUN;
DATA NEW;
SET PRACTICE.AIRTOUR;
IF(NIGHTS>3 OR NUMBEROFEVENTS>5) AND (TOURGUIDE='Lucas' OR
CITY='Paris') THEN TYPE='HaHaHa';
ELSE TYPE='NaNaNa';
RUN;
DATA NEW;
SET PRACTICE.AIRTOUR;
IF LANDCOST THEN REMARKS = 'NON MISSING';
ELSE REMARKS='MISSING';
RUN;
DATA AIRTOUR;
SET COM.AIRTOUR;
IF TOURGUIDE = 'lucas' THEN GROUP = 'A';
ELSE GROUP = 'B';
RUN;
DATA AIRTOUR;
SET COM.AIRTOUR;
IF UPCASE(TOURGUIDE) = 'LUCAS' THEN GROUP = 'A';
ELSE GROUP = 'B';
RUN;
DATA ABC;
SET COM.AIRTOUR;
IF BACKUPGUIDE = :'D' THEN SELECT ='YES';
ELSE SELECT='NO';
RUN;
DATA ABC;
SET COM.AIRTOUR;
IF INDEX(EVENTDESCRIPTION, 'other') THEN STATUS='check' ;
ELSE STATUS = 'ok';
RUN;
/* TO SEE THAT WHAT IS THE PLACE OF A CERTAIN LETTER */
DATA ABC;
SET COM.AIRTOUR;
IF INDEX(EVENTDESCRIPTION, 'other') THEN STATUS='check' ;
ELSE STATUS = 'ok';
EVENT= INDEX(EVENTDESCRIPTION, 'M')
RUN;
DATA ABC DEF;
SET COM.AIRTOUR;
IF TOURGUIDE = 'Lucas' THEN OUTPUT ABC;
ELSE OUTPUT DEF;
RUN;
PROC SORT DATA=COM.CARS OUT=DEF;
BY MAKE;
RUN;
DATA ABC;
SET COM.CARS;
BY MAKE;
ONE = FIRST.MAKE;
TWO = LAST.MAKE;
RUN;
DATA ABC;
SET ABC DEF;
RUN;
DATA MM;
MERGE ABC DEF;
RUN;
PROC FACTOR DATA=PRACTICE.PCA3
METHOD=P
PRIORS=SMC
SCREE
ROUND
FLAG=.4
CORR
RES
ROTATE=PROMAX;
VAR ITEM13--ITEM20;
RUN;
PROC CORR DATA=PRACTICE.PCA ALPHA;
VAR FACNAT--DEGREE;
RUN;
DATA COM.CARS;
SET SASHELP.CARS;
IF ORIGIN='Asia' THEN DASIA=1;
ELSE DASIA=0;
IF ORIGIN='Europe' THEN DEUROPE=1;
ELSE DEUROPE=0;
LINVOICE= LOG(INVOICE);
RUN;
PROC CORR DATA=COM.CARS;
RUN;
PROC REG DATA=COM.CARS;
MODEL LINVOICE=MPG_CITY WEIGHT DASIA DEUROPE;
RUN;
/*
INTERCEPT = 9.7
MPG_CITY SLOPE = -0.024
WEIGHT = 0.00022
1 LBS INCREASE IN WEIGHT OF CAR WILL INCREASE CAR PRICE BY 0.00022
EUPORE'S PRICE IS GREATER THAN USA'S PRICE BY 0.51 UNITS W.R.T INTERCEPT
*/
PROC REG DATA=COM.CARS;
MODEL LINVOICE=MPG_CITY WEIGHT DASIA DEUROPE;
OUTPUT OUT=COM.RES (KEEP=LINVOICE MPG_CITY WEIGHT DASIA
DEUROPE LEV RES CD DFFIT)
H=LEV RSTUDENT=RES DFFITS=DFFIT COOKD=CD;
RUN;
PROC PRINT DATA=COM.RES;
WHERE ABS(RES)>2;
RUN;
PROC PRINT DATA=COM.RES;
WHERE LEV>((2*4)+2)/428;
RUN;
PROC PRINT DATA=COM.RES;
WHERE ABS(RES)>2 AND LEV>((2*4)+2)/428;
RUN;
DATA COM.RES;
SET COM.RES;
OBS+1;
RUN;
PROC REG DATA=COM.RES;
MODEL LINVOICE=MPG_CITY WEIGHT DASIA DEUROPE/INFLUENCE;
OUTPUT OUT=COM.INFLU;
ID OBS;
RUN;
PROC REG DATA=COM.CARS;
MODEL LINVOICE=MPG_CITY WEIGHT DASIA DEUROPE;
OUTPUT OUT=NAYADATASETNAME (KEEP=PINVOICE LINVOICE MPG_CITY
WEIGHT DASIA DEUROPE) PREDICTED=PINVOICE;
RUN;
DATA ABC;
SET NAYADATASETNAME;
RMS=((LINVOICE-PINVOICE)*(LINVOICE-PINVOICE))/428;
RUN;
data hw.web1;
set hw.web;
Num = compress(Google_pluses,'M',);
NumNew =compress(Num,'K');
run;
DATA WEB;
SET COM.WEB;
b = compress(GOOGLE_PLUSES,'','A');
/* c = compress(GOOGLE_PLUSES, b);
IF C='M' THEN GP=B*1000000;
ELSE IF C='K' THEN GP=B*1000;
ELSE IF C='' THEN GP=B; */
RUN;