CsvDataExtractor.ps1

. $PSScriptRoot\Exception.ps1

Function GetUrisFromCsv {
    Param(
        [Parameter(Mandatory=$True)]
        [ValidateScript({ If (Test-Path $_ -PathType Leaf -ErrorAction SilentlyContinue) {
            $True
        } Else {
            Throw [UriLoadException]::new("$_ does not exist or is not a file")
        }})]
        [String]
        $CsvFile
    )

    CheckForDuplicateHeaders $CsvFile
    $ImportedCsv = Import-Csv -Path $CsvFile -Delimiter ',' -Encoding 'UTF8'
    $LineNumber = 2
    $Uris = [System.Collections.ArrayList] @()
    ForEach($Line in $ImportedCsv) {
        $UriString = ($Line.'digitalObjectURI').Trim()
        If ($UriString) {
            If (-Not [System.Uri]::IsWellFormedUriString($UriString, [System.UriKind]::Absolute)) {
                $Msg = ("Cell '$UriString' in the digitalObjectURI column on line " +
                        "$LineNumber of the CSV is not a valid URI")
                Throw [UriLoadException]::new($Msg)
            }

            $NewUri = [System.Uri] $UriString

            If ($NewUri.Segments.Length -lt 2) {
                $Msg = "URI on line $($LineNumber) does not appear to point to a file"
                Throw [UriLoadException]::new($Msg)
            }

            $Uris.Add($NewUri) | Out-Null
        }
        $LineNumber += 1
    }

    If ($Uris.Count -eq 0) {
        $Msg = "Could not find any URLs in the digitalObjectURI column of the CSV"
        Throw [UriLoadException]::new($Msg)
    }

    $Uris
}

Function CheckForDuplicateHeaders {
    Param(
        [Parameter(Mandatory=$True)]
        [String]
        $CsvFile
    )

    $ResolvedCsv = Resolve-Path $CsvFile
    $FileReader = $Null
    Try {
        $FileReader = [System.IO.StreamReader]::new($ResolvedCsv)
        $FirstLine = $FileReader.ReadLine()
        $DirtyHeaders = $FirstLine.Split(',') | ForEach-Object { "$($_.Trim())" } | Where-Object { $_ }
        $GroupedHeaders = $DirtyHeaders | Group-Object
        ForEach($Group in $GroupedHeaders) {
            If ($Group.Count -gt 1) {
                $Msg = "The column name '$($Group.Name)' appears more than once"
                Throw [CsvReadException]::new($Msg)
            }
        }
    }
    Catch [Exception] {
        Throw $_
    }
    Finally {
        If ($Null -ne $FileReader) {
            $FileReader.Close()
            $FileReader = $Null
        }
    }
}