/*------------------------------------------------------------------------------ File: RemoveUnusedColumns.sas Author: John Sabel john.sabel@ofm.wa.gov Washington State Education Research & Data Center Creation date: 2/27/13 Version: 0.9 Purpose This macro analyzes an inputted data set for columns that have no data. It then outputs a second data in which the columns without data have been removed. By using the optional AlwaysKeep parameter, certain columns can always be kept, even if they do not contain data. Parameters DsnIn: The name of the inputted data set. DsnOut: The name of the outputted data set. AlwaysKeep: (Optional) Space delimited names of columns that are always kept in DsnOut, even when they contain no data. AlwaysDrop: (Optional) Space delimited names columns that are always dropped. This parameter can be used in lieu of a DROP statement in the DsnIn statement. Unlike using a drop statement, This list can contain column names that are not actually in DsnIn. Debug (Optional) When Debug=Y, data is outputted to output. Signature %RemoveUnusedColumns(DsnIn=, DsnOut=, AlwaysKeep=, AlwaysDrop=, Debug=N) Revisions: Date Version Author Comments ------------------------------------------------------------------------------*/ %macro RemoveUnusedColumns(DsnIn=, DsnOut=, AlwaysKeep=, AlwaysDrop=, Debug=N); %put --- Start of %upcase(&sysmacroname.) macro; %local ReturnColumns; %local ColumnsDropped; %let ColumnsDropped=; %local warn ing; %let warn=WARN; %let ing=ING; %let AlwaysKeep=%trim(%upcase(&AlwaysKeep.)); %let AlwaysDrop=%trim(%upcase(&AlwaysDrop.)); proc sql %if &Debug.=N %then %do; noprint %end;; CREATE TABLE __RemoveSchemaDsnIn LIKE &DsnIn.; title "All Columns in DsnIn"; SELECT Name INTO :AllColNames1 - :AllColNames999 FROM DICTIONARY.COLUMNS WHERE UPCASE(LIBNAME) ="WORK" AND UPCASE(MEMNAME) = "%upcase(__RemoveSchemaDsnIn)" AND UPCASE(NAME) NOT IN ("%sysfunc(tranwrd(&AlwaysDrop.,%str( ), %bquote(",")))") ORDER BY NPOS ; %let NumVars = &sqlobs.; title "Columns in DsnIn, Minus the Columns in AlwaysKeep"; SELECT Name INTO :SelectColNames SEPARATED BY " " FROM DICTIONARY.COLUMNS WHERE UPCASE(LIBNAME) ="WORK" AND UPCASE(MEMNAME) = "%upcase(__RemoveSchemaDsnIn)" AND UPCASE(NAME) NOT IN ("%sysfunc(tranwrd(&AlwaysKeep.,%str( ), %bquote(",")))") ; %let SelectColNames=&SelectColNames.; title "Total Number of Records in DsnIn"; SELECT COUNT(*) INTO :TotalRecordCount FROM &DsnIn. ; quit; %let CommaDelimiter=; %do i = 1 %to &NumVars.; %if %length(%trim(&ReturnColumns.)) > 0 %then %do; %let CommaDelimiter=,; %end; %if %sysfunc(indexw(&SelectColNames., &&AllColNames&I.))=0 %then %do; %let ReturnColumns = %bquote(&ReturnColumns.&CommaDelimiter.&&AllColNames&I.); %end; %else %do; proc sql noprint; title "Number of Not Null Records for &&AllColNames&I."; SELECT COUNT(*) INTO :NullRecordCount FROM &DsnIn. WHERE &&AllColNames&I. IS NULL ; quit; %if &NullRecordCount.<&TotalRecordCount. %then %do; %let ReturnColumns = %bquote(&ReturnColumns.&CommaDelimiter.&&AllColNames&I.); %end; %end; %end; %if &TotalRecordCount.>0 & %length(%trim(&ReturnColumns.)) > 0 %then %do; /* DsnIn not only has records, but at least one of these records has (non-null) data.*/ proc sql; CREATE TABLE &DsnOut. AS SELECT &ReturnColumns. FROM &DsnIn. ; CREATE TABLE __RemoveSchemaDsnOut LIKE &DsnOut.; quit; proc sql noprint; CREATE TABLE __RemoveColumnsDeleted AS SELECT DsnInColumns.NAME /* INTO :ColumnsDropped SEPARATED BY ", "*/ FROM ( SELECT NAME, NPOS FROM DICTIONARY.COLUMNS WHERE UPCASE(LIBNAME) ="WORK" AND UPCASE(MEMNAME) = "%upcase(__RemoveSchemaDsnIn)" ) DsnInColumns LEFT JOIN ( SELECT NAME FROM DICTIONARY.COLUMNS WHERE UPCASE(LIBNAME) ="WORK" AND UPCASE(MEMNAME) = "%upcase(__RemoveSchemaDsnOut)" ) DsnOutColumns ON DsnInColumns.Name = DsnOutColumns.Name WHERE DsnOutColumns.Name IS NULL ORDER BY DsnInColumns.NPOS ; SELECT Name INTO :ColumnsDropped SEPARATED BY ", " FROM __RemoveColumnsDeleted ; quit; %if &Debug=N %then %do; proc sql; DROP TABLE __RemoveSchemaDsnIn, __RemoveColumnsDeleted, __RemoveSchemaDsnOut; quit; %end; %if %LENGTH(&ColumnsDropped.)=0 %then %do; %put NOTE: All columns in &DsnIn. not in the AlwaysKeep parameter contain non-null data. Therefore, DsnOut (&DsnOut.) is a copy of DsnIn (&DsnIn.).; %end; %else %do; %put NOTE: The following were dropped from DsnOut (&DsnOut.): &ColumnsDropped..; %end; %end; %else %do; %if &TotalRecordCount.=0 %then %do; %put &warn&ing: &DsnIn. has no records.; %end; %else %do; %put &warn&ing: All data in &DsnIn. is null.; %end; %put &warn&ing: The data set &DsnOut. was not created.; /* Here is the situation: DsnIn has records, but all the data is null. Yet from some previous SAS routine, there might be a pre-existing DsnOut. Normally, this DsnOut would be overwritten. But since there is nothing to overwrite it with, DsnOut is not overwritten. However, this macro can't just let this stray DsnOut to continue to exist, because that would imply this macro had created it. So if there is a stray pre-existing DsnOut, it is deleted. */ %if %sysfunc(exist(&DsnOut.)) %then %do; proc sql; DROP TABLE &DsnOut.; quit; %put &warn&ing: The pre-existing data set &DsnOut. has been deleted.; %end; %end; %if &Debug.~=N %then %do; %put Listing of local macro variables; %put _local_; %end; %put --- End of %upcase(&sysmacroname.) macro; %mend RemoveUnusedColumns; /* * Sample data; data SampleAnimalData; input ID Salary Animal $ Sound $; datalines; 1 . Cow Moo 2 . Fish . 3 . Bird Tweet 4 . Lamb Baaa! ; run; * In this example, Salary is always null. But since it is listed in the "AlwaysKeep" parameter, it is included in the outputted data set, SampleAnimalData_Out.; %RemoveUnusedColumns(DsnIn=SampleAnimalData, DsnOut=SampleAnimalData_Out, AlwaysKeep=Salary, Debug=N) * This example shows how the AlwaysDrop parameter can be a list of columns that are always dropped when they exist in DsnIn. "CrepesSuzzette" is an example of a column name that does not exist in DsnIn. Its presence has no effect on the execution of the macro. It is simply ignored.; %RemoveUnusedColumns(DsnIn=SampleAnimalData, DsnOut=SampleAnimalData_OutNoSound, AlwaysKeep=Salary NonSense, AlwaysDrop=Sound CrepesSuzzette, Debug=Y) data SampleOnlyNullData; x=.; y=''; run; * This example shows that if DsnIn has records, then DsnOut is not created. %RemoveUnusedColumns(DsnIn=SampleOnlyNullData, DsnOut=SampleOnlyNullData_Out, AlwaysKeep=, Debug=N) data SampleNoDataWhatSoEver; length x 8. y $12; run; * This example shows that if DsnIn has no records, then DsnOut is is not created. %RemoveUnusedColumns(DsnIn=SampleNoDataWhatSoEver, DsnOut=SampleNoDataWhatSoEver_Out, AlwaysKeep=, Debug=N) data SampleNoDataWhatSoEver; length x 8. y $12; run; data SampleNoDataWhatSoEver_Out; x=5; run; * This example shows that if DsnIn contains no records, yet DsnOut was created previously, DsnOut is deleted.; %RemoveUnusedColumns(DsnIn=SampleNoDataWhatSoEver, DsnOut=SampleNoDataWhatSoEver_Out, AlwaysKeep=, Debug=N) */