/*----------------------------------------------------------------------------- File: ReadInLinkPlusReport.sas Author: John Sabel john.sabel@ofm.wa.gov Washington State Education Research & Data Center Creation date: 1/31/12 Version: 0.9 Purpose This program reads in the Link Plus report file. The program splits the report file into two data sets, 1) the header portion (metadata) of the report file, and 2) the data itself. Note By default, SAS variables are eight characters long. If your dataset has variables longer than eight characters, and these variables will be used in "ModifyLinkPlusReport.sas", then you will need to modify this program to to read in the full length of these characters. Examples of how to modify this program are shown in lines 171 through 216. For example, this program modifies different permutations of variables for first, middle and last name so as to allow for 20 character field lengths. Also, this program uses the function FixIdVariables(). The definition of FixIdVariables is in a separate program, "CreateLinkPlusFunctions.sas", available on the ERDC website. So you will have to download and run "CreateLinkPlusFunctions.sas" before you can run this program. You only need to run "CreateLinkPlusFunctions.sas" once, and then the FixIdVariables() will be stored permanently in SASUSER.FCMP.LINKPLUS. Everytime this program is run, it checks to see if SAS has a referenece to the package SASUSER.FCMP. If it doeesn't, it adds a reference. The program, and the resulting dependency to FixIdVariables() was written to faciliate ease of first time use. In more formal code, you could remove the check (lines 93, 94 and 85), and in in an autoexec file, add the reference to the package using an options statement (i.e. "options cmplib=SASUSER.FCMP"). Input Parameters: LinkRptPathIn: Path of a unmodified Link Plus report. Output Parameters: MetadataDsnOut: The metadata header of a Link Plus report as a SAS dataset. ReportDsnOut: The data section of a Link Plus report as a SAS dataset. Debug Parameter: Y = Don't delete intermediate dataset "__MetaDataHeader." N = Delete "__MetaDataHeader." Default is N. Macro Variables Outputted put_1, put_2: A tab delimited list of Link Plus report variables. Used in the creation of a modifed Link Plus report. The clerical review function of Link Plus doesn't actually use any of these variables directly from the report. Instead, the clerical review function pulls in these variables from the original ACSCII your files used to as input to Link Plus. If you want to keep report size down, and you don't care to view it yourself, then you could use the macro "justtabs" in "AssembleLinkPlusReport.sas." justtabs A tab delimited list of nothing. This can be used in lieu of put_1 and put_2 above when you don't care to put in the unneeded variables when you create a modified Link Plus report in "AssembleLinkPlusReport.sas." ------------------------------------------------------------------------------- Copyright © 2012 Washington State Office of Financial Management This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -----------------------------------------------------------------------------*/ %macro ReadInLinkPlusReport(LinkRptPathIn=, MetadataDsnOut=, ReportDsnOut=, Debug=N); %put --- Start of %upcase(&sysmacroname) macro; /* These variables are used in AssembleLinkPlusReport.sas */ %global put_1 put_2; /* Checks to see if reference already exists to the location of FixIdVariables(). If the reference doesn't exist,then is adds it. */ %if %index(%sysfunc(getoption(cmplib)), SASUSER.FCMP) = 0 %then %do; options append=(CMPLIB=SASUSER.FCMP); %end; /* This data step captures metadata into a set of macro variables from the header of Link Plus' report file */ data __MetaDataHeader; infile "&LinkRptPathIn." length=len obs=50 dsd; length put_1 $300 put_2 $300; retain HeaderLineNum 0; input dataline : $varying300. len @; HeaderLineNum = HeaderLineNum + 1; if substr(dataline,1,10)='Linkage ID' then do; /* Remove blanks and "-" characters, so that "Linkage ID" becomes "LinkageID", etc. */ dataline2 = strip(compress(compress(dataline, "-"))); /* For deduplication reports: strip out LinkageID, SetID, Record#*/ dataline3=prxchange('s/LinkageID\tSetID\tRecord#\t//',-1, dataline2); /* For linking reports: strip out LinkageID, File, Record# */ dataline4=prxchange('s/LinkageID\tFile\tRecord#\t//',-1, dataline3); /* Strip out Class, Score */ dataline5=prxchange('s/\tClass\tScore//',-1,dataline4); /* Create new variable names by appending "_1" to remaining variable names */ length_1 = prxchange('s/(\w+)/$1_1/', -1, dataline5); length_1 = FixIdVariables(length_1); /* Create new variable names by appending "_2" to remaining variable names */ length_2 = prxchange('s/(\w+)/$1_2/', -1, dataline5); length_2 = FixIdVariables(length_2); %if &Debug. = Y %then %do; put length_1 =; put length_2 =; put; %end; /* Add "$" in between every variable name. Used in the "input" statement below. */ input_1 = prxchange('s/1\t/1 \$ /', -1, length_1); input_1 = prxchange('s/2\t/2 \$ /', -1, input_1); /* Add "$" after last variable name */ input_1 = compbl(input_1 || ' $'); input_2=compbl(prxchange('s/1\t/1 \$ /', -1, length_2)); input_2=compbl(prxchange('s/2\t/2 \$ /', -1, input_2)); /* Add "$" after last variable name */ input_2 = compbl(input_2 || ' $'); /* Create put statement fragments from raw input statement fragments. Changes all "$" that are for an input statement to tabs for use in the put statements used to output modified report. However, these variable names and data are NOT used by Link Plus when it generates Clerical Review views. It obtains the data for the views by pulling the it from the source data file. So if you don't care about having these variablesin your modified report, use the "justtabs" variable in the final put statements. */ put_1 = prxchange("s/ \$ / '09'x /", -1, input_1); put_2 = prxchange("s/ \$ / '09'x /", -1, input_2); justtabs = prxchange('s/(\w+)//', -1, length_1); justtabs = prxchange("s/\t/ '09'x /", -1, justtabs); %if &Debug. = Y %then %do; put put_1 =; put put_2 =; put; put justtabs =; /* Only produces (NumberColumns -1) tabs */ put; %end; /* A kludge. */ input_1 = tranwrd(input_1, '_2', '_kludge'); input_2 = tranwrd(input_2, '_1', '_kludge'); /* Modify the input statements to take in the full 9 character SSN. */ /* Parameters to tranwrd() are case sensitive. */ input_1 = tranwrd(input_1, 'ssn_1 $', 'ssn_1 : $9.'); input_2 = tranwrd(input_2, 'ssn_2 $', 'ssn_2 : $9.'); /* Modify input statements to take in full 20 character of first and last names that are in ERDC data. */ input_1 = tranwrd(input_1, 'LASTNAME_1 $', 'LASTNAME_1 : $20.'); input_2 = tranwrd(input_2, 'LASTNAME_2 $', 'LASTNAME_2 : $20.'); input_1 = tranwrd(input_1, 'FIRSTNAME_1 $', 'FIRSTNAME_1 : $20.'); input_2 = tranwrd(input_2, 'FIRSTNAME_2 $', 'FIRSTNAME_2 : $20.'); input_1 = tranwrd(input_1, 'lname_1 $', 'lname_1 : $20.'); input_2 = tranwrd(input_2, 'lname_2 $', 'lname_2 : $20.'); input_1 = tranwrd(input_1, 'fname_1 $', 'fname_1 : $20.'); input_2 = tranwrd(input_2, 'fname_2 $', 'fname_2 : $20.'); input_1 = tranwrd(input_1, 'LastName_1 $', 'LastName_1 : $20.'); input_2 = tranwrd(input_2, 'LastName_2 $', 'LastName_2 : $20.'); input_1 = tranwrd(input_1, 'FirstName_1 $', 'FirstName_1 : $20.'); input_2 = tranwrd(input_2, 'FirstName_2 $', 'FirstName_2 : $20.'); input_1 = tranwrd(input_1, 'last_name_1 $', 'last_name_1 : $20.'); input_2 = tranwrd(input_2, 'last_name_2 $', 'last_name_2 : $20.'); input_1 = tranwrd(input_1, 'first_name_1 $', 'first_name_1 : $20.'); input_2 = tranwrd(input_2, 'first_name_2 $', 'first_name_2 : $20.'); input_1 = tranwrd(input_1, 'middle_name_1 $', 'middle_name_1 : $20.'); input_2 = tranwrd(input_2, 'middle_name_2 $', 'middle_name_2 : $20.'); /* Modify input statements to take in the full 16 characters of the DistrictStudentCode from the K-12 data.. */ input_1 = tranwrd(input_1, 'DistrictStudentCode_1 $', 'DistrictStudentCode_1 : $16.'); input_2 = tranwrd(input_2, 'DistrictStudentCode_2 $', 'DistrictStudentCode_2 : $16.'); /* Modify input statements to take in full 11 digit rec_key used in hospitalization data */ input_1 = tranwrd(input_1, 'REC_KEY_1 $', 'REC_KEY_1 : $11.'); input_2 = tranwrd(input_2, 'REC_KEY_2 $', 'REC_KEY_2 : $11.'); %if &Debug. = Y %then %do; put input_1 =; put input_2 =; put; put HeaderLineNum =; call symputx('length_1',length_1); call symputx('length_2',length_2); call symputx('input_1',input_1); call symputx('input_2',input_2); %end; call symputx('put_1',put_1); call symputx('put_2',put_2); call symputx('justtabs',justtabs); call symputx('HeaderLineNum',HeaderLineNum); end; run; /* Create a dataset containing just the metadata header portion of a Link Plus report. */ data &MetadataDsnOut.; infile "&LinkRptPathIn." length=len dsd obs=&HeaderLineNum.; input dataline : $varying200. len @; *putlog dataline _error_ _n_; run; /* Create a dataset containing the report data itself from a Link Plus report. */ data &ReportDsnOut.; %let StartingObs=%eval(&HeaderLineNum.+1); infile "&LinkRptPathIn." dlm='09'x dsd firstobs=&StartingObs.; input #2 LinkageID1 SetOrLinkID1 RecordNum1 &input_1. #3 LinkageID2 SetOrLinkID2 RecordNum2 &input_2. class score; output; run; %if &Debug. = N %then %do; proc datasets lib=work nolist; delete __MetaDataHeader; run; quit; %end; %put --- End of %upcase(&sysmacroname) macro; %mend ReadInLinkPlusReport; %ReadInLinkPlusReport(LinkRptPathIn=\\ofm\gwu\FC\SECURE\ERDC\data\ERDC_Staff\John\LinkPlus\P210\P210_0304\Report\Unmodified\P210_0304_20110321.txt, MetadataDsnOut=Metadata, ReportDsnOut=ReportData, Debug=Y) %ModifyLinkPlusReport(ReportDsnIn=ReportData, ReportDsnOut=ReportData2, Debug=Y) %AssembleLinkPlusReport(MetadataDsnIn=Metadata, ReportDsnIn=ReportData2, LinkRptPathOut=\\ofm\gwu\FC\SECURE\ERDC\data\ERDC_Staff\John\LinkPlus\P210\P210_0304\Report\Unmodified\P210_0304_20110321_temp.txt, Debug=Y)