/*----------------------------------------------------------------------------- File: CreateLinkPlusFunctions.sas Author: John Sabel john.sabel@ofm.wa.gov Washington State Education Research & Data Center Creation date: 1/31/12 Version: 0.91 Purpose This program creates one Link Plus specific function. When you run this program, the function that is created is stored in SASUser.FCMP.LinkPlus. There is a dependency between this location and the program "ReadInLinkPlus.sas." The program "ReadInLinkPlus.sas" is coded to look for the function FixIdVariables() in SASUser.FCMP.LinkPlus. The programs, and the resulting dependency are written to faciliate ease of first time use. Link Plus specific functions: 1) FixIdVariables(MetadataString $) $ Used in "ReadInLinkPlus.sas." 2) GetDateCategory(Date1, Date2) Returns 0 if same date, 1 if dates similar, 2 if dates dissimilar 3) GetLastNameCategory(LastName1 $, FirstName1 $, LastName2 $, FirstName2 $) Based on similarities of last names and their interactions with first names, returns values 0 (same) through 4 (dissimilar. 4) GetFirstNameCategory(FirstName1 $, MiddleName1 $, FirstName2 $, MiddleName2 $) Based on similarities of first names and their interactions with first names, returns values 0 (same) to 2 (dissimilar) Revisions Date Version Author Comments 2012-07-06 0.91 JRS Added GetDateCategory(), GetLastNameCategory(), and GetFirstNameCategory(). ------------------------------------------------------------------------------- Copyright © 2012 Washington State Office of Financial Management This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -----------------------------------------------------------------------------*/ proc fcmp outlib = SASUser.FCMP.LinkPlus; function FixIdVariables(MetadataString $) $; length ReturnString $512 MetaString $512; /* Need to create copy of MetadataString to avoid a WA RNING being thrown. */ MetaString = strip(MetadataString); if indexc(MetaString, '09'x) then delimiter = '09'x; else delimiter = ' '; NumWords = countc(MetaString, '', 'st') + 1; if index(MetaString, '_1') then do; do i = 1 to NumWords - 1; SingleWord = scanq(MetaString, i, delimiter); ReturnString = catx(delimiter, ReturnString, SingleWord); if prxmatch('/_1/', SingleWord) then do; SingleWord2 = prxchange('s/(\w+)_1/$1_2/', - 1, SingleWord) ; MetaString = tranwrd(MetaString, delimiter || trim(SingleWord), delimiter || trim(SingleWord2)); *MetaString = tranwrd(MetaString, '09'x || SingleWord, '09'x || SingleWord2); end; end; ReturnString = catx(delimiter, ReturnString, scanq(MetaString, NumWords, delimiter)); end; else do; do i = NumWords to 2 by -1; SingleWord = scanq(MetaString, i, delimiter); ReturnString = catx(delimiter, SingleWord, ReturnString); if prxmatch('/_2/', SingleWord) then do; SingleWord2 = prxchange('s/(\w+)_2/$1_1/', - 1, SingleWord) ; MetaString = tranwrd(MetaString, trim(SingleWord) || delimiter, trim(SingleWord2) || delimiter); *MetaString = tranwrd(MetaString, '09'x || SingleWord, '09'x || SingleWord2); end; end; ReturnString = catx(delimiter, scanq(MetaString, 1, delimiter), ReturnString); end; Put ReturnString=; return(ReturnString); endsub; /* The following functions are Link Plus specific. */ /* Has 3 categories. */ function GetDateCategory(Date1, Date2); day1 = day(Date1); day2 = day(Date2); month1 = month(Date1); month2 = month(Date2); year1 = year(Date1); year2 = year(Date2); if month1 > month2 then do; month_large = month1; month_small = month2; end; else do; month_large = month2; month_small = month1; end; if day1 > day2 then do; day_large = day1; day_small = day2; end; else do; day_large = day2; day_small = day1; end; if year1 > year2 then do; year_large = year1; year_small = year2; end; else do; year_large = year2; year_small = year1; end; if Date1 = Date2 then GetDateCategory = 0; else if /* Withing two months, days or years of one another */ (month_small + 2 >= month_large AND day1 = day2 AND year1 = year2) OR (day_small + 2 >= day_large AND month1 = month2 AND year1 = year2) OR (year_small + 2 >= year_large AND month1 = month2 AND day1 = day2) OR /* Transposed month digits */ (put(month1, 2.) = REVERSE(put(month2, 2.)) AND day1 = day2 AND year1 = year2) OR /* Transposed day digits */ (put(day1, 2.) = REVERSE(put(day2, 2.)) AND month1 = month2 AND year1 = year2) OR /* Month or day +- exactly 10 days from one another */ (month1 + 10 = month2 AND day1 = day2 and year1 = year2) OR (day1 + 10 = day2 AND month1 = month2 and year1 = year2) OR /* Transposed month and day */ (day1 = month2 AND day2 = month1 AND year1 = year2) OR /* Fuzzy old dates. */ ( year1 <= 1939 AND ( (day_small + 3 >= day_large AND month1 = month2 AND year1 = year2) OR (year_small + 3 >= year_large AND month1 = month2 AND day1 = day2) ) ) then GetDateCategory = 1; else GetDateCategory = 2; return(GetDateCategory); endsub; function HasABadBirthDate(Date1, Date2, LowYearCutoff, HighYearCutoff); /* January 1st birthdates, always suspect. */ month1 = month(Date1); month2 = month(Date2); day1 = day(Date1); day2 = day(Date2); year1 = year(Date1); year2 = year(Date2); /* put month1 day1 year1 month2 day2 year2;*/ HasABadBirthDate = 0; if Date1 = . or Date2 = . then HasABadBirthDate = 1; if month1 = 1 and day1 = 1 then HasABadBirthDate = 1; if month2 = 1 and day2 = 1 then HasABadBirthDate = 1; if year1 < LowYearCutoff or year2 < LowYearCutoff then HasABadBirthDate = 1; if year1 > HighYearCutoff or year2 > HighYearCutoff then HasABadBirthDate = 1; /* put LowYearCutoff HighYearCutoff;*/ return(HasABadBirthDate); endsub; /* Has 5 categories. */ function GetLastNameCategory(LastName1 $, FirstName1 $, LastName2 $, FirstName2 $); /* Inverted two-word last names. */ LastNamesInverted = 0; if LastName_1 ne LastName_2 and scan(LastName_1, 1) = scan(LastName_2, -1) then LastNamesInverted = 1; if compress(LastName1) = compress(LastName2) or LastNamesInverted then LastName_category = 0; else if HasWordInString(LastName1, LastName2) then LastName_category = 1; else if (soundex(LastName1) = soundex(LastName2) and strip(LastName1) ne '') or (strip(LastName1) =: strip(LastName2) and length(LastName1) >= 3 and length(LastName2) >= 3) then LastName_category =2; else if (soundex(LastName1) = soundex(FirstName2)) and (soundex(LastName2) = soundex(FirstName1)) then LastName_category = 3; else LastName_category = 4; return(LastName_category); endsub; /* Has 3 categories. */ function GetFirstNameCategory(FirstName1 $, MiddleName1 $, FirstName2 $, MiddleName2 $); if (compress(FirstName1) = compress(FirstName2)) or compress(cats(FirstName1, MiddleName1)) = compress(cats(FirstName2, MiddleName2 )) or ((FirstName1 = 'JOSH' and FirstName2 = 'JOSHUA') or (FirstName1 = 'JOSHUA' and FirstName2 = 'JOSH')) then FirstName_category = 0; else if HasWordInString(FirstName1, FirstName2) or (soundex(FirstName1) = soundex(FirstName2) and strip(FirstName1) ne ' ') or (strip(FirstName1) =: strip(FirstName2) and length(FirstName1) >= 3 and length(FirstName2) >= 3) then FirstName_category = 1; else FirstName_category = 2; return(FirstName_category); endsub; run;