/*----------------------------------------------------------------------------- File: MergeLinkPlusLinkageFiles.sas Author: John Sabel john.sabel@ofm.wa.gov Washington State Education Research & Data Center Creation date: 2/27/12 Version: 0.9 Purpose For a given export directory, this program concatenates all the Link Plus exported "View" files for a data file to data file linkage project. It will not combine export files from a deduplication project. It filters the files in the export directory based on a ".txt" file extension. Therefore, this program will throw an error if it encounters in the export directory a ".txt" file that is NOT a export file. Parameters DirIn: The directory path to the exported Link Plus views. This directory should only contain the export files that you want to link together. DsnOut: What you want to call the final two-level dataset name. OnlyGoodLinks: (Optional) Set to Y means only links with that have a Match_Status = 1 will be kept, otherwise, all linkages, including those with a Match_Status of 2 and 3 will be kept. Default is Y. Debug: (Optional) Y = Don't delete intermidiate datasets. N = Delete intermidiate datasets. Default is N Note Running this macro with the "OnlyGoodLinks" parameter set to N results in a SAS data set that contains every linkage record, whether they are flagged "accept", "uncertain" or "reject." Running the following SQL statement will result in a nice report that can give you information that will help you to set cutoff values for future linking runs using similar data: proc sql; title 'FirstBatch Statistics'; SELECT class, count(*) AS TotalMatches, sum(case when Match_Status = 1 then 1 else 0 end) as AcceptedMatches, sum(case when Match_Status = 2 then 1 else 0 end) as RejectedMatches, sum(case when Match_Status = 3 then 1 else 0 end) as UncertainMatches, min(case when Match_Status = 1 then (score - (class * 100)) end) AS MinScoreAccepted, max(case when Match_Status <> 1 then (score - (class * 100)) end) AS MaxScoreNotAccepted FROM <> GROUP BY class ORDER BY class ; quit; This SQL statement assumes that the Link Plus report data had been first modified by the program ModifyLinkPlusReport.sas (The default implentation of ModifyLinkPlusReport.sas modified the score variable). ------------------------------------------------------------------------------- Copyright © 2012 Washington State Office of Financial Management This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -----------------------------------------------------------------------------*/ %macro MergeLinkPlusLinkageFiles(DirIn=, DsnOut=, OnlyGoodLinks=Y, Debug=N); %put --- Start of %upcase(&sysmacroname) macro; %local err or; %let err=ERR; %let or=OR; %if "&debug" ^= "Y" and "&debug" ^= "N" %then %do; %put &err&or: (&sysmacroname.) Invalid value for the macro parameter DEBUG.; %put &err&or: (&sysmacroname) Value should be either N or Y.; %goto exit; %end; %local i; filename dircmd pipe "dir /b &DirIn."; data __Dirfiles; length outfile $200; Keep outfile; infile dircmd missover length=length; input @; input bigline $varying200. length; outfile = strip(bigline); if substr(strip(reverse(outfile)), 1, 3) = 'txt'; /* We only want .txt files */ run; /* Create a list of export files, and a list of what will be the SAS dataset names for these files. Because export files names could begin with a number, the SAS dataset names will all be prepended by the characters "__". */ proc sql noprint; SELECT outfile, "__" || substr(outfile,1,length(outfile)-4) INTO :RawDataNames separated by ' ', :SasDataNames separated by ' ' FROM __Dirfiles ; %let numfiles = &sqlobs.; quit; %if &numfiles.= 0 %then %do; %put &err&or: (&sysmacroname.) The directory "&DirIn." does not contain .txt files.; %goto exit; %end; /* Create datasets from all the Link Plus Export files */ %FetchLinkPlusLinkageExportFile(FileIn=&DirIn.\%scan(&RawDataNames.,1,%str( )),DsnOut=%scan(&SasDataNames.,1,%str( )),OnlyGoodLinks=&OnlyGoodLinks.) %if %sysfunc(exist(%scan(&SasDataNames.,1,%str( ))))=0 %then %do; %put &err&or: (&sysmacroname.) The dataset "%scan(&SasDataNames.,&i.,%str( ))" does not exist.; %put &err&or: (&sysmacroname) This macro will now terminate.; %goto exit; %end; data &DsnOut.; set %scan(&SasDataNames.,1,%str( )); run; %do i = 2 %to &numfiles.; %FetchLinkPlusLinkageExportFile(FileIn=&DirIn.\%scan(&RawDataNames.,&i.,%str( )),DsnOut=%scan(&SasDataNames.,&i.,%str( )),OnlyGoodLinks=&OnlyGoodLinks.) /* Triggered if macro FetchLinkPlusLinkageExportFile() does not return a SAS data set. */ %if %sysfunc(exist(%scan(&SasDataNames.,&i.,%str( ))))=0 %then %do; %put &err&or: (&sysmacroname.) The dataset "%scan(&SasDataNames.,&i.,%str( ))" does not exist.; %put &err&or: (&sysmacroname) This macro will now terminate.; %goto exit; %end; proc append base=&DsnOut. data=%scan(&SasDataNames.,&i.,%str( )); run; %end; %exit: %if &debug. = N %then %do; proc datasets lib=work nolist; delete __Dirfiles &SasDataNames.; run; quit; %end; %else %do; %put _user_; %end; %put --- End of %upcase(&sysmacroname) macro; %mend MergeLinkPlusLinkageFiles;