ExampleScripts/Prepare-AddressesForImport.ps1

###############################
"Parse and Normalize Addresses"
###############################
$timestamp = Get-Date -Format "yyyy-MM-dd-HHmmss"
$output_folder  = "$HOME\Downloads\business-territory-$timestamp"
$google_doc_id = $env:Google_Doc_Id
"Parameters Set:"
" timestamp: $timestamp"
" output_folder: $output_folder"
" google_doc_id: $google_doc_id"

##################################
"Creating output folder: $output_folder"
##################################
mkdir -Path $output_folder 

########################################
"Downloading latest copy of Google document..."
########################################
$outfile = "$output_folder\00-downloaded.tsv"
$googleDocUri = "https://docs.google.com/spreadsheets/d/$google_doc_id/export?exportFormat=tsv"
Invoke-WebRequest -Uri $googleDocUri -OutFile $outfile
$infile = $outfile

###################
"Replacing first row..."
###################
$outfile = "$output_folder\01a-first-row-replaced.tsv"
$header = "Empty1`tEmpty2`tName`tAddress`tUnit`tCity`tState`tZip`tPhone`tComments"
$header > $outfile
Get-Content -Path $infile -Encoding UTF8 | Select -Skip 1 >> $outfile
$infile = $outfile

#####################
"Remove extra spaces..."
#####################
$outfile = "$output_folder\01b-remove-extra-spaces.tsv"
$e = "[ ][ ]+"
$r = " "
Get-Content -Path $infile -Encoding UTF8 | ForEach { "$($_ -replace $e, $r)" } > $outfile
$infile = $outfile

#####################################
"Split Address from City, State, Zip"
#####################################
$outfile = "$output_folder\02-split-address-1.tsv"
$e = "^\t\t" +
     "(?<name>[^\t]*)\t" +
     "(?<address>[a-zA-Z0-9 .#\-,/&]+),\s*" +
     "(?<city>[a-zA-Z0-9 .#\-]+),\s*" +
     "(?<state>WA),?\s+" +
     "(?<zip>\d\d\d\d\d)\s*\t" +
     "(?<phone>[^\t]*)\t" +
     "(?<comment>[^\t]*)"
$r = "`t`t`$1`t`$2`t`$3`tWA`t`$5`t`$6`t`$7"
Get-Content -Path $infile -Encoding UTF8 | ForEach { $_ -replace "$e", "$r" } > $outfile
$infile = $outfile

##############################################
"Split Unit Portion from Addresses With Units"
##############################################
$outfile = "$output_folder\03-split-unit.tsv"
$e = "^\t\t([^\t]*)\t" + 
    "\s*([^,\t]*)\s*,\s*([^,\t]*)\s*\t" + # Unit portion
    "([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)$"

$r = "`t`t`$1`t`$2`t`$3`t`$4`t`$5`t`$6`t`$7`t`$8"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile
$infile = $outfile

############################################
"Add Blank Unit for Addresses Without Units"
############################################
$outfile = "$output_folder\04-blank-unit.tsv"
$e = "^\t\t([^\t]*)\t" + 
    "([^\t]*)\t" + # Unit portion
    "([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)$"

$r = "`t`t`$1`t`$2`t`t`$3`t`$4`t`$5`t`$6`t`$7"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile
$infile = $outfile

###############################
"Normalize Phone Numbers"
###############################
$outfile = "$output_folder\04c-normalize-phone.tsv"
$e = "^(\t\t[^\t]*\t[^\t]*\t[^\t]*\t[^\t]*\t[^\t]*\t[^\t]*\t)[(]?(\d\d\d)[)-]\s*(\d\d\d)\s*-(\d\d\d\d)\s*(\t[^\t]*)$"
$r = "`$1`$2-`$3-`$4`$5"
#$r = "`$1`$5"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile
$infile = $outfile

###############################
"Isolate Names"
###############################
$outfile = "$output_folder\10-isolate-names.tsv"
$e = "^[^\t]*\t[^\t]*\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)$"
$r = "`$1"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile

###############################
"Isolate Address"
###############################
$outfile = "$output_folder\11.0-isolate-address.txt"
$r = "`$2"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile

#####################################################
"Remove Abbreviation Dots from Address"
"(Not needed if there is another normalization step)"
#####################################################
$outfile = "$output_folder\11.2-isolate-address-remove-dots.txt"
Get-Content -Path "$output_folder\11.0-isolate-address.txt" -Encoding UTF8 | % { $_.Replace(".", "") } > $outfile

#######################################################
"Normalize Addresses (only) with Territory Tools Alba Console"
#######################################################
$outFile = "$output_folder\11.3-isolate-address-normalize.txt" 
Get-Content -Path "$output_folder\11.2-isolate-address-remove-dots.txt" -Encoding UTF8 `
    | % { $_ `
          -replace "\bNorth\b", "N" -replace "\bSouth\b", "S" `
          -replace "\bEast\b", "E" -replace "\bWest\b", "W" `
          -replace "\bNortheast\b", "NE" -replace "\bNorthwest\b", "SW" `
          -replace "\bSouthwest\b", "NW" -replace "\bSoutheast\b", "SE" `
          -replace "\bStreet\b", "St" -replace "\bAvenue\b", "Ave" `
          -replace "\bDrive\b", "Dr" -replace "\bParkway\b", "Pkwy" `
          -replace "\bRoad\b", "Rd" -replace "\bHighway\b", "Hwy" `
        } `
    > $outfile

###############################
"Isolate Unit"
###############################
$outfile = "$output_folder\12-isolate-unit.tsv"
$r = "`$3"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile

###############################
"Isolate City"
###############################
$outfile = "$output_folder\13-isolate-city.tsv"
$r = "`$4"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile

###############################
"Isolate Zip"
###############################
$outfile = "$output_folder\14-isolate-zip.tsv"
$r = "`$6"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile

###############################
"Isolate Phone"
###############################
$outfile = "$output_folder\15-isolate-phone.tsv"
$r = "`$7"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile

###############################
"Isolate Comments"
###############################
$outfile = "$output_folder\16-isolate-comments.tsv"
$r = "`$8"
Get-Content -Path $infile -Encoding UTF8 | % { $_ -replace "$e", "$r" } > $outfile

########################
"Combine Isolated Files"
########################
$names = Get-Content -Path "$output_folder\10-isolate-names.tsv" -Encoding UTF8 
$addresses = Get-Content -Path "$output_folder\11.3-isolate-address-normalize.txt" -Encoding UTF8 
$units = Get-Content -Path "$output_folder\12-isolate-unit.tsv" -Encoding UTF8 
$cities = Get-Content -Path "$output_folder\13-isolate-city.tsv" -Encoding UTF8 
# skips states
$zips = Get-Content -Path "$output_folder\14-isolate-zip.tsv" -Encoding UTF8 
$phones = Get-Content -Path  "$output_folder\15-isolate-phone.tsv" -Encoding UTF8 
$comments = Get-Content -Path "$output_folder\16-isolate-comments.tsv" -Encoding UTF8 

#####################################
"Count Rows in Each File and Compare"
#####################################
$nameCount =$names.Length
$addresseCount =$addresses.Length
$unitCount =$units.Length
$cityCount = ($cities.Length)
#$stateCount = ($states.Length)
$zipCount = ($zips.Length)
$phoneCount = ($phones.Length)
$commentCount = ($comments.Length)

##################################################
"Print Summary (All row counts must be the same):"
##################################################
"Names: $nameCount"
"Addresses: $addresseCount"
"Units: $unitCount"
"Cities: $cityCount"
#"States: $stateCount"
"Zips: $zipCount"
"Phones: $phoneCount"
"Comments: $commentCount"

# We are skipping the state/province in this script
# -or $stateCount -ne $nameCount `

if($addresseCount    -ne $nameCount `
    -or $unitCount   -ne $nameCount `
    -or $cityCount   -ne $nameCount `
    -or $zipCount    -ne $nameCount `
    -or $phoneCount  -ne $nameCount `
    -or $commentCount-ne $nameCount) {
    throw "Counts do not match, cannot continue"
}

$outFile = "$output_folder\17-recombined.tsv"
Remove-Item -Path $outFile -Force -ErrorAction Ignore
For ($i = 0; $i -lt $names.Length; $i++) {
    "$($names[$i])`t$($addresses[$i])`t$($units[$i])`t$($cities[$i])`tWA`t$($zips[$i])`t$($phones[$i])`t$($comments[$i])" >> $outFile
}

#####################################################
"Combining Files into Export File in Alba TSV Format"
#####################################################
$outFile = "$output_folder\18.0-alba.tsv" 
$null > $outFile
For ($i = 0; $i -lt $names.Length; $i++) {
    "$($names[$i])`t$($units[$i])`t$($addresses[$i])`t$($cities[$i])`tWA`t$($zips[$i])`t`t`t`t$($phones[$i])`t$($comments[$i])" >> "$output_folder\18.0-alba.tsv"
}
$infile = $outfile

#####################################################
"Combining Files into Export File in Alba Console TSV Import Format"
#####################################################
$outFile = "$output_folder\18.1-alba-console-import.tsv" 
#$null > $outfile
$header = "Address_ID`tTerritory_ID`tLanguage`tStatus`tName`tSuite`tAddress`tCity`tProvince`tPostal_code`tCountry`tLatitude`tLongitude`tTelephone`tNotes`tNotes_private"
$header > $outfile
$territory_index = 0
$territory_ids = (101, 102, 103, 104, 105)
# In case you want to skip the headers
$starting_row = 1
For ($i = $starting_row; $i -lt $names.Length; $i++) {
    "`t$($territory_ids[$territory_index])`tChinese Mandarin`tNew`t$($names[$i])`t$($units[$i])`t$($addresses[$i])`t$($cities[$i])`tWA`t$($zips[$i])`tUSA`t`t`t$($phones[$i])`t$($comments[$i])`t2020-10-29: Imported for November 2020 Campaign" `
    >> $outFile
    if($territory_index -eq 4) {
        $territory_index = 0
    } else {
        $territory_index++
    }
}
$infile = $outfile

#"Replacing first row (header)..."
#$outFile = "$output_folder\18.2-replace-header.tsv"
#$header = "Address_ID`tTerritory_ID`tLanguage`tStatus`tName`tSuite`tAddress`tCity`tProvince`tPostal_code`tCountry`tLatitude`tLongitude`tTelephone`tNotes`tNotes_private"
#$header > $outfile
#Get-Content -Path $infile -Encoding UTF8 | Select -Skip 1 >> $outfile
#$infile = $outfile


#######################################################
#"Normalize Addresses with Territory Tools Normalizer
#######################################################
#$outFile = "$output_folder\19-normalized.tsv"
#Get-Content $infile | Get-NormalizedAddress | Out-File $outFile

"Done"