PROC SQL By Becky Leung Alberta Health Services Calgary SAS User Group Meeting Wednesday, October 08, 2014
WHAT IS PROC SQL? A Base SAS procedure that combines the functionality of DATA and PROC steps into a single step
WHAT CAN PROC SQL DO? Sort, summarize, subset, join (merge), and concatenate datasets Create new variables and print the results or create a new table or view within one step Retrieve, update and report on information from SAS datasets
SYNTAX PROC SQL; CREATE table-name view-name AS SELECT column(s) FROM table-name view-name WHERE expression GROUP BY columns(s) HAVING expression ORDER BY columns(s) ; QUIT;
MERGING TABLES IN PROC SQL We can merge two tables together within one step We can merge three or more tables together within one step
MERGING TWO TABLES
TABLE A A 1 2 3 4 B TABLE B B 1 2 3 4 C MERGE: ONE TO ONE
TABLE A TABLE B A B B C 1 2 3 4 1 1 1 2 2 3 MERGE: ONE TO MANY 4 TABLE A TABLE B C B B A 1 1 1 2 1 2 3 4 2 3 4 MERGE: MANY TO ONE
TABLE A TABLE B A B B C 1 1 2 2 2 3 4 4 1 1 1 2 2 3 4 MERGE: MANY TO MANY
SYNTAX PROC SQL; CREATE table-name view-name AS SELECT column(s) FROM table-name view-name alias1 JOIN table-name view-name alias2 ON alias1.varible-name EQ alias2.variable-name ; QUIT;
TABLE A: HOSPITAL_ADMISSION OBS # PATIENT_ID HOSPITAL_ADMISSION_DATE 1 1 01JAN13 2 1 03FEB13 3 2 10JAN13 4 2 10MAR13 5 2 13SEP13 6 3 02APR13 7 4 15FEB13 8 4 09DEC13 TABLE B: PATIENT_DEMOGRAPHICS OBS # PATIENT_ID BIRTHDATE CITY HOUSEHOLD_INCOME 1 1 01FEB53 CALGARY 60,000 2 2 22OCT43 CALGARY 100,000 3 3 12MAR43 CALGARY 25,000 4 4 01JUN44 EDMONTON 0 5 5 03JAN44 EDMONTON 130,000
1 3 5 2 4 6 7
SYNTAX MERGING TWO TABLES PROC SQL; CREATE TABLE PATIENT_INFO1 AS SELECT A.*, B.BIRTHDATE FROM HOSPITAL_ADMISSION A LEFT JOIN PATIENT_DEMOGRAPHICS B ON A.PATIENT_ID EQ B.PATIENT_ID; QUIT;
RESULT TABLE: PATIENT_INFO1 OBS # PATIENT_ID HOSPITAL_ADMISSION_DATE BIRTHDATE 1 1 1-Jan-13 1-Feb-53 2 1 3-Feb-13 1-Feb-53 3 2 10-Jan-13 22-Oct-43 4 2 13-Sep-13 22-Oct-43 5 2 10-Mar-13 22-Oct-43 6 3 2-Apr-13 12-Mar-43 7 4 15-Feb-13 1-Jun-44 8 4 9-Dec-13 1-Jun-44
SYNTAX MERGING TWO TABLES PROC SQL; CREATE TABLE PATIENT_INFO2 AS SELECT A.*, B.BIRTHDATE FROM HOSPITAL_ADMISSION A RIGHT JOIN PATIENT_DEMOGRAPHICS B ON A.PATIENT_ID EQ B.PATIENT_ID; QUIT;
RESULT TABLE: PATIENT_INFO2 OBS # PATIENT_ID HOSPITAL_ADMISSION_ DATE BIRTHDATE 1 1 1-Jan-13 1-Feb-53 2 1 3-Feb-13 1-Feb-53 3 2 10-Jan-13 22-Oct-43 4 2 13-Sep-13 22-Oct-43 5 2 10-Mar-13 22-Oct-43 6 3 2-Apr-13 12-Mar-43 7 4 15-Feb-13 1-Jun-44 8 4 9-Dec-13 1-Jun-44 9.. 3-Jan-44
SYNTAX MERGING TWO TABLES PROC SQL; CREATE TABLE PATIENT_INFO3 AS SELECT B.*, A.HOSPITAL_ADMISSION_DATE FROM HOSPITAL_ADMISSION A RIGHT JOIN PATIENT_DEMOGRAPHICS B ON A.PATIENT_ID EQ B.PATIENT_ID; QUIT;
RESULT TABLE: PATIENT_INFO3 OBS # PATIENT_ ID BIRTHDATE CITY HOUSEHOL D_IINCOME HOSPITAL_ ADMISSIO N_DATE 1 1 1-Feb-53 CALGARY 60000 1-Jan-13 2 1 1-Feb-53 CALGARY 60000 3-Feb-13 3 2 22-Oct-43 CALGARY 100000 10-Jan-13 4 2 22-Oct-43 CALGARY 100000 13-Sep-13 5 2 22-Oct-43 CALGARY 100000 10-Mar-13 6 3 12-Mar-43 CALGARY 25000 2-Apr-13 7 4 1-Jun-44 EDMONTON 0 15-Feb-13 8 4 1-Jun-44 EDMONTON 0 9-Dec-13 9 5 3-Jan-44 EDMONTON 130000.
SYNTAX MERGING TWO TABLES PROC SQL; CREATE TABLE PATIENT_INFO3A AS SELECT B.*, A.HOSPITAL_ADMISSION_DATE FROM HOSPITAL_ADMISSION A RIGHT JOIN PATIENT_DEMOGRAPHICS B ON A.PATIENT_ID EQ B.PATIENT_ID WHERE HOUSEHOLD_IINCOME GT 0; QUIT;
RESULT TABLE: PATIENT_INFO3A OBS # PATIENT_ ID BIRTHDATE CITY HOUSEHOL D_IINCOME HOSPITAL_ ADMISSIO N_DATE 1 1 1-Feb-53 CALGARY 60000 1-Jan-13 2 1 1-Feb-53 CALGARY 60000 3-Feb-13 3 2 22-Oct-43 CALGARY 100000 10-Jan-13 4 2 22-Oct-43 CALGARY 100000 13-Sep-13 5 2 22-Oct-43 CALGARY 100000 10-Mar-13 6 3 12-Mar-43 CALGARY 25000 2-Apr-13 7 5 3-Jan-44 EDMONTON 130000.
SYNTAX MERGING TWO TABLES PROC SQL; CREATE TABLE PATIENT_INFO4 AS SELECT A.PATIENT_ID AS PATIENT_ID_A LABEL="PATIENT ID A", B.PATIENT_ID AS PATIENT_ID_B LABEL="PATIENT ID B", HOSPITAL_ADMISSION_DATE LABEL="HOSPITAL ADMISSION DATE", B.BIRTHDATE LABEL="PATIENT BIRTHDATE FORMAT=DATE11. FROM HOSPITAL_ADMISSION A INNER JOIN PATIENT_DEMOGRAPHICS B ON A.PATIENT_ID EQ B.PATIENT_ID; QUIT;
RESULT TABLE: PATIENT_INFO4 OBS # PATIENT ID A PATIENT ID B HOSPITAL ADMISSION DATE PATIENT BIRTHDATE 1 1 1 1-Jan-13 1-Feb-1953 2 1 1 3-Feb-13 1-Feb-1953 3 2 2 10-Jan-13 22-Oct-1943 4 2 2 13-Sep-13 22-Oct-1943 5 2 2 10-Mar-13 22-Oct-1943 6 3 3 2-Apr-13 12-Mar-1943 7 4 4 15-Feb-13 1-Jun-1944 8 4 4 9-Dec-13 1-Jun-1944
SYNTAX MERGING TWO TABLES PROC SQL; CREATE TABLE PATIENT_INFO5 AS SELECT FROM QUIT; A.PATIENT_ID AS PATIENT_ID_A LABEL="PATIENT ID A", B.PATIENT_ID AS PATIENT_ID_B LABEL="PATIENT ID B", HOSPITAL_ADMISSION_DATE LABEL="HOSPITAL ADMISSION DATE", B.BIRTHDATE LABEL= PATIENT BIRTHDATE FORMAT=DATE11., FLOOR((HOSPITAL_ADMISSION_DATE - BIRTHDATE)/365) AS AGE LABEL="PATIENT AGE" HOSPITAL_ADMISSION A FULL OUTER JOIN PATIENT_DEMOGRAPHICS B ON A.PATIENT_ID EQ B.PATIENT_ID;
RESULT TABLE: PATIENT_INFO5 OBS # PATIENT ID A PATIENT ID B HOSPITAL ADMISSION DATE PATIENT BIRTHDATE 1 1 1 1-Jan-13 1-Feb-1953 59 2 1 1 3-Feb-13 1-Feb-1953 60 3 2 2 10-Jan-13 22-Oct-1943 69 4 2 2 13-Sep-13 22-Oct-1943 69 5 2 2 10-Mar-13 22-Oct-1943 69 6 3 3 2-Apr-13 12-Mar-1943 70 7 4 4 15-Feb-13 1-Jun-1944 68 8 4 4 9-Dec-13 1-Jun-1944 69 9. 5. 3-Jan-1944. PATIENT AGE
REFERENCES http://www2.sas.com/proceedings/sugi27/p191-27.pdf
THANK YOU