CsvDataExtractor.Tests.ps1

BeforeAll {
    . $PSScriptRoot\CsvDataExtractor.ps1
    . $PSScriptRoot\Exception.ps1
}

Describe 'CSV Data Extractor Unit Tests' -Tag 'Unit' {
    Context 'Verify CSV columns' {
        It 'Should not throw if no duplicate columns' {
            Mock ReadFirstLine { return "digitalObjectURI,a,b,c" }
            { VerifyColumns 'dummy.csv' } | Should -Not -Throw -ExceptionType ([CsvReadException])
            Assert-MockCalled ReadFirstLine
        }

        It 'Should not throw if digitalObjectChecksums exists and -WithChecksums' {
            Mock ReadFirstLine { return "digitalObjectURI,digitalObjectChecksum" }
            { VerifyColumns 'dummy.csv' -WithChecksums } | Should -Not -Throw -ExceptionType ([CsvReadException])
            Assert-MockCalled ReadFirstLine
        }

        It 'Should throw if column occurs twice' {
            Mock ReadFirstLine { return "digitalObjectURI,a,b,c,d,a" }
            { VerifyColumns 'dummy.csv' } | Should -Throw -ExceptionType ([CsvReadException])
            Assert-MockCalled ReadFirstLine
        }

        It 'Should throw if multiple columns are duplicated' {
            Mock ReadFirstLine { return 'digitalObjectURI,a,b,c,d,a,b,c,d' }
            { VerifyColumns 'dummy.csv' } | Should -Throw -ExceptionType ([CsvReadException])
            Assert-MockCalled ReadFirstLine
        }

        It 'Should strip " and whitespace from column names' {
            Mock ReadFirstLine { return 'digitalObjectURI ,col, "digitalObjectURI" ' }
            { VerifyColumns 'dummy.csv' } | Should -Throw -ExceptionType ([CsvReadException])
            Assert-MockCalled ReadFirstLine
        }

        It 'Should throw if no digitalObjectURI column' {
            Mock ReadFirstLine { return 'a,b,c' }
            { VerifyColumns 'dummy.csv' } | Should -Throw -ExceptionType ([CsvReadException])
            Assert-MockCalled ReadFirstLine
        }

        It 'Should throw if digitalObjectChecksums not present if -WithChecksums' {
            Mock ReadFirstLine { return 'digitalObjectURI,a,b,c' }
            { VerifyColumns 'dummy.csv' -WithChecksums } | Should -Throw -ExceptionType ([CsvReadException])
            Assert-MockCalled ReadFirstLine
        }
    }

    Context 'Load URIs from CSV' {
        BeforeAll {
            Mock ReadFirstLine { Return 'digitalObjectURI' }
        }

        It 'Should parse URIs from a CSV file with one column' {
            Mock Import-Csv { Return @(
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object1.pdf' },
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object2.pdf' },
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object3.pdf' }
            )}

            $Uris = [Object[]] (GetUrisFromCsv -CsvFile 'dummy.csv')

            $Uris.Count | Should -BeExactly 3
            $Uris[0].Uri.Host | Should -BeExactly 'myatom.ca'
            $Uris[0].Uri.PathAndQuery | Should -BeExactly '/object1.pdf'
            $Uris[1].Uri.Host | Should -BeExactly 'myatom.ca'
            $Uris[1].Uri.PathAndQuery | Should -BeExactly '/object2.pdf'
            $Uris[2].Uri.Host | Should -BeExactly 'myatom.ca'
            $Uris[2].Uri.PathAndQuery | Should -BeExactly '/object3.pdf'
        }

        It 'Should parse URIs from a CSV with multiple columns' {
            Mock ReadFirstLine { Return 'digitalObjectURI,other' } # Override mock
            Mock Import-Csv { Return @(
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object1.pdf'; other='X' },
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object2.pdf'; other='Y' },
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object3.pdf'; other='Z' }
            )}
            $Uris = [Object[]] (GetUrisFromCsv -CsvFile 'dummy.csv')

            $Uris.Count | Should -BeExactly 3
            $Uris[0].Uri.Host | Should -BeExactly 'myatom.ca'
            $Uris[0].Uri.PathAndQuery | Should -BeExactly '/object1.pdf'
            $Uris[1].Uri.Host | Should -BeExactly 'myatom.ca'
            $Uris[1].Uri.PathAndQuery | Should -BeExactly '/object2.pdf'
            $Uris[2].Uri.Host | Should -BeExactly 'myatom.ca'
            $Uris[2].Uri.PathAndQuery | Should -BeExactly '/object3.pdf'
        }

        It 'Should ignore empty cells' {
            Mock Import-Csv { Return @(
                [PSCustomObject] @{ digitalObjectURI='' },
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object2.pdf' },
                [PSCustomObject] @{ digitalObjectURI='' }
            )}
            $Uris = [Object[]] (GetUrisFromCsv -CsvFile 'dummy.csv')

            $Uris.Count | Should -BeExactly 1
            $Uris[0].Uri.Host | Should -BeExactly 'myatom.ca'
            $Uris[0].Uri.PathAndQuery | Should -BeExactly '/object2.pdf'
        }

        It 'Should throw if multiple domains found' {
            Mock Import-Csv { Return @(
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object1.pdf' },
                [PSCustomObject] @{ digitalObjectURI='https://youratom.ca/object2.pdf' },
                [PSCustomObject] @{ digitalObjectURI='https://ouratom.ca/object3.pdf' }
            )}

            { GetUrisFromCsv -CsvFile 'dummy.csv' } | Should -Throw -ExceptionType ([MultipleDomainException])
        }

        It 'Should throw if invalid URI is found' {
            Mock Import-Csv { Return @(
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/object1.pdf' },
                [PSCustomObject] @{ digitalObjectURI='garbage' }
            )}

            { GetUrisFromCsv -CsvFile 'dummy.csv' } | Should -Throw -ExceptionType ([UriLoadException])
        }

        It 'Should throw if no path and query exist' {
            Mock Import-Csv { Return @(
                [PSCustomObject] @{ digitalObjectURI='https://myatom.ca/' }
            )}

            { GetUrisFromCsv -CsvFile 'dummy.csv' } | Should -Throw -ExceptionType ([UriLoadException])
        }

        It 'Should throw if there are no URIs in the file' {
            Mock Import-Csv { Return @(
                [PSCustomObject] @{ digitalObjectURI='' },
                [PSCustomObject] @{ digitalObjectURI='' },
                [PSCustomObject] @{ digitalObjectURI='' }
            )}

            { GetUrisFromCsv -CsvFile 'dummy.csv' } | Should -Throw -ExceptionType ([UriLoadException])
        }
    }
}


Describe 'CSV Data Extractor Integration Tests' -Tag 'Integration' {
    Context 'Read first line' {
        BeforeAll {
            $TestFile = Join-Path -Path $TestDrive -ChildPath 'test.csv'
        }

        It 'Should read first line if multiple lines exist' {
            Set-Content -Path $TestFile -Value "hello`nworld`ntest`ncase"
            ReadFirstLine $TestFile | Should -BeExactly 'hello'
        }

        It 'Should return empty string if file is empty' {
            Set-Content -Path $TestFile -Value ''
            ReadFirstLine $TestFile | Should -BeExactly ''
        }

        It 'Should return first line if no newlines' {
            Set-Content -Path $TestFile -Value 'hello'
            ReadFirstLine $TestFile | Should -BeExactly 'hello'
        }
    }

    Context 'Verify CSV columns' {
        BeforeAll {
            $TestCsv = Join-Path -Path $TestDrive -ChildPath 'test.csv'
        }

        It 'Should not throw if no duplicate columns' {
            $Lines = @(
                'digitalObjectURI,a,b,c',
                '0,1,2,3'
            )
            Set-Content -Path $TestCsv -Value ($Lines -Join "`n")

            { VerifyColumns $TestCsv } | Should -Not -Throw -ExceptionType ([CsvReadException])
        }

        It 'Should not throw if digitalObjectChecksums exists and -WithChecksums' {
            $Lines = @(
                'digitalObjectURI,digitalObjectChecksum',
                '0,1'
            )
            Set-Content -Path $TestCsv -Value ($Lines -Join "`n")

            { VerifyColumns $TestCsv -WithChecksums } | Should -Not -Throw -ExceptionType ([CsvReadException])
        }

        It 'Should throw if column occurs twice' {
            $Lines = @(
                'digitalObjectURI,a,b,c,d,a',
                '0,1,2,3,4,5'
            )
            Set-Content -Path $TestCsv -Value ($Lines -Join "`n")

            { VerifyColumns $TestCsv } | Should -Throw -ExceptionType ([CsvReadException])
        }

        It 'Should throw if multiple columns are duplicated' {
            $Lines = @(
                'digitalObjectURI,a,b,c,d,a,b,c,d',
                '0,1,2,3,4,5,6,7,8'
            )
            Set-Content -Path $TestCsv -Value ($Lines -Join "`n")

            { VerifyColumns $TestCsv } | Should -Throw -ExceptionType ([CsvReadException])
        }

        It 'Should strip " and whitespace from column names' {
            $Lines = @(
                ' digitalObjectURI ,col2, "digitalObjectURI" ',
                '1,2,3'
            )
            Set-Content -Path $TestCsv -Value ($Lines -Join "`n")

            { VerifyColumns $TestCsv } | Should -Throw -ExceptionType ([CsvReadException])
        }

        It 'Should throw if no digitalObjectURI column' {
            $Lines = @(
                'a,b,c',
                '1,2,3'
            )
            Set-Content -Path $TestCsv -Value ($Lines -Join "`n")

            { VerifyColumns $TestCsv -WithChecksums } | Should -Throw -ExceptionType ([CsvReadException])
        }

        It 'Should throw if digitalObjectChecksums not present if -WithChecksums' {
            $Lines = @(
                'digitalObjectURI',
                '0'
            )
            Set-Content -Path $TestCsv -Value ($Lines -Join "`n")

            { VerifyColumns $TestCsv -WithChecksums } | Should -Throw -ExceptionType ([CsvReadException])
        }
    }
}