CsvDataExtractor.Tests.ps1
BeforeAll { . $PSScriptRoot\CsvDataExtractor.ps1 . $PSScriptRoot\Exception.ps1 } Describe 'CSV Data Extractor Unit Tests' -Tag 'Unit' { Context 'Verify CSV columns' { It 'Should not throw if no duplicate columns' { Mock ReadFirstLine { return "digitalObjectURI,a,b,c" } { VerifyColumns 'dummy.csv' } | Should -Not -Throw -ExceptionType ([CsvReadException]) Assert-MockCalled ReadFirstLine } It 'Should not throw if digitalObjectChecksums exists and -WithChecksums' { Mock ReadFirstLine { return "digitalObjectURI,digitalObjectChecksum" } { VerifyColumns 'dummy.csv' -WithChecksums } | Should -Not -Throw -ExceptionType ([CsvReadException]) Assert-MockCalled ReadFirstLine } It 'Should throw if column occurs twice' { Mock ReadFirstLine { return "digitalObjectURI,a,b,c,d,a" } { VerifyColumns 'dummy.csv' } | Should -Throw -ExceptionType ([CsvReadException]) Assert-MockCalled ReadFirstLine } It 'Should throw if multiple columns are duplicated' { Mock ReadFirstLine { return 'digitalObjectURI,a,b,c,d,a,b,c,d' } { VerifyColumns 'dummy.csv' } | Should -Throw -ExceptionType ([CsvReadException]) Assert-MockCalled ReadFirstLine } It 'Should strip " and whitespace from column names' { Mock ReadFirstLine { return 'digitalObjectURI ,col, "digitalObjectURI" ' } { VerifyColumns 'dummy.csv' } | Should -Throw -ExceptionType ([CsvReadException]) Assert-MockCalled ReadFirstLine } It 'Should throw if no digitalObjectURI column' { Mock ReadFirstLine { return 'a,b,c' } { VerifyColumns 'dummy.csv' } | Should -Throw -ExceptionType ([CsvReadException]) Assert-MockCalled ReadFirstLine } It 'Should throw if digitalObjectChecksums not present if -WithChecksums' { Mock ReadFirstLine { return 'digitalObjectURI,a,b,c' } { VerifyColumns 'dummy.csv' -WithChecksums } | Should -Throw -ExceptionType ([CsvReadException]) Assert-MockCalled ReadFirstLine } } Context 'Load URIs from CSV' { BeforeAll { Mock ReadFirstLine { Return 'digitalObjectURI' } } It 'Should parse URIs from a CSV file with one column' { Mock Import-Csv { Return @( [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object1.pdf' }, [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object2.pdf' }, [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object3.pdf' } )} $Uris = [Object[]] (GetUrisFromCsv -CsvFile 'dummy.csv') $Uris.Count | Should -BeExactly 3 $Uris[0].Uri.Host | Should -BeExactly 'myatom.ca' $Uris[0].Uri.PathAndQuery | Should -BeExactly '/object1.pdf' $Uris[1].Uri.Host | Should -BeExactly 'myatom.ca' $Uris[1].Uri.PathAndQuery | Should -BeExactly '/object2.pdf' $Uris[2].Uri.Host | Should -BeExactly 'myatom.ca' $Uris[2].Uri.PathAndQuery | Should -BeExactly '/object3.pdf' } It 'Should parse URIs from a CSV with multiple columns' { Mock ReadFirstLine { Return 'digitalObjectURI,other' } # Override mock Mock Import-Csv { Return @( [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object1.pdf'; other='X' }, [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object2.pdf'; other='Y' }, [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object3.pdf'; other='Z' } )} $Uris = [Object[]] (GetUrisFromCsv -CsvFile 'dummy.csv') $Uris.Count | Should -BeExactly 3 $Uris[0].Uri.Host | Should -BeExactly 'myatom.ca' $Uris[0].Uri.PathAndQuery | Should -BeExactly '/object1.pdf' $Uris[1].Uri.Host | Should -BeExactly 'myatom.ca' $Uris[1].Uri.PathAndQuery | Should -BeExactly '/object2.pdf' $Uris[2].Uri.Host | Should -BeExactly 'myatom.ca' $Uris[2].Uri.PathAndQuery | Should -BeExactly '/object3.pdf' } It 'Should ignore empty cells' { Mock Import-Csv { Return @( [PSCustomObject] @{ digitalObjectURI='' }, [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object2.pdf' }, [PSCustomObject] @{ digitalObjectURI='' } )} $Uris = [Object[]] (GetUrisFromCsv -CsvFile 'dummy.csv') $Uris.Count | Should -BeExactly 1 $Uris[0].Uri.Host | Should -BeExactly 'myatom.ca' $Uris[0].Uri.PathAndQuery | Should -BeExactly '/object2.pdf' } It 'Should throw if multiple domains found' { Mock Import-Csv { Return @( [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object1.pdf' }, [PSCustomObject] @{ digitalObjectURI='https://youratom.ca/object2.pdf' }, [PSCustomObject] @{ digitalObjectURI='https://ouratom.ca/object3.pdf' } )} { GetUrisFromCsv -CsvFile 'dummy.csv' } | Should -Throw -ExceptionType ([MultipleDomainException]) } It 'Should throw if invalid URI is found' { Mock Import-Csv { Return @( [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object1.pdf' }, [PSCustomObject] @{ digitalObjectURI='garbage' } )} { GetUrisFromCsv -CsvFile 'dummy.csv' } | Should -Throw -ExceptionType ([UriLoadException]) } It 'Should throw if no path and query exist' { Mock Import-Csv { Return @( [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/' } )} { GetUrisFromCsv -CsvFile 'dummy.csv' } | Should -Throw -ExceptionType ([UriLoadException]) } It 'Should throw if there are no URIs in the file' { Mock Import-Csv { Return @( [PSCustomObject] @{ digitalObjectURI='' }, [PSCustomObject] @{ digitalObjectURI='' }, [PSCustomObject] @{ digitalObjectURI='' } )} { GetUrisFromCsv -CsvFile 'dummy.csv' } | Should -Throw -ExceptionType ([UriLoadException]) } } } Describe 'CSV Data Extractor Integration Tests' -Tag 'Integration' { Context 'Read first line' { BeforeAll { $TestFile = Join-Path -Path $TestDrive -ChildPath 'test.csv' } It 'Should read first line if multiple lines exist' { Set-Content -Path $TestFile -Value "hello`nworld`ntest`ncase" ReadFirstLine $TestFile | Should -BeExactly 'hello' } It 'Should return empty string if file is empty' { Set-Content -Path $TestFile -Value '' ReadFirstLine $TestFile | Should -BeExactly '' } It 'Should return first line if no newlines' { Set-Content -Path $TestFile -Value 'hello' ReadFirstLine $TestFile | Should -BeExactly 'hello' } } Context 'Verify CSV columns' { BeforeAll { $TestCsv = Join-Path -Path $TestDrive -ChildPath 'test.csv' } It 'Should not throw if no duplicate columns' { $Lines = @( 'digitalObjectURI,a,b,c', '0,1,2,3' ) Set-Content -Path $TestCsv -Value ($Lines -Join "`n") { VerifyColumns $TestCsv } | Should -Not -Throw -ExceptionType ([CsvReadException]) } It 'Should not throw if digitalObjectChecksums exists and -WithChecksums' { $Lines = @( 'digitalObjectURI,digitalObjectChecksum', '0,1' ) Set-Content -Path $TestCsv -Value ($Lines -Join "`n") { VerifyColumns $TestCsv -WithChecksums } | Should -Not -Throw -ExceptionType ([CsvReadException]) } It 'Should throw if column occurs twice' { $Lines = @( 'digitalObjectURI,a,b,c,d,a', '0,1,2,3,4,5' ) Set-Content -Path $TestCsv -Value ($Lines -Join "`n") { VerifyColumns $TestCsv } | Should -Throw -ExceptionType ([CsvReadException]) } It 'Should throw if multiple columns are duplicated' { $Lines = @( 'digitalObjectURI,a,b,c,d,a,b,c,d', '0,1,2,3,4,5,6,7,8' ) Set-Content -Path $TestCsv -Value ($Lines -Join "`n") { VerifyColumns $TestCsv } | Should -Throw -ExceptionType ([CsvReadException]) } It 'Should strip " and whitespace from column names' { $Lines = @( ' digitalObjectURI ,col2, "digitalObjectURI" ', '1,2,3' ) Set-Content -Path $TestCsv -Value ($Lines -Join "`n") { VerifyColumns $TestCsv } | Should -Throw -ExceptionType ([CsvReadException]) } It 'Should throw if no digitalObjectURI column' { $Lines = @( 'a,b,c', '1,2,3' ) Set-Content -Path $TestCsv -Value ($Lines -Join "`n") { VerifyColumns $TestCsv -WithChecksums } | Should -Throw -ExceptionType ([CsvReadException]) } It 'Should throw if digitalObjectChecksums not present if -WithChecksums' { $Lines = @( 'digitalObjectURI', '0' ) Set-Content -Path $TestCsv -Value ($Lines -Join "`n") { VerifyColumns $TestCsv -WithChecksums } | Should -Throw -ExceptionType ([CsvReadException]) } } } |