Check-Csv.ps1
|
<#PSScriptInfo
.VERSION 0.1.0 .GUID 4e8a6d76-4b54-4f63-8d35-5d9f8427d1f3 .AUTHOR Repository Maintainer .COMPANYNAME .COPYRIGHT (c) 2026 Repository Maintainer. All rights reserved. .TAGS CSV Validation Encoding Delimiter LineEndings Import Quality .LICENSEURI https://opensource.org/licenses/MIT .PROJECTURI https://github.com/JoergBrors/Check-CSV .DESCRIPTION Prüft CSV-Dateien in einem Verzeichnis auf Encoding, Delimiter, Header, Zeilenenden und Import-Kompatibilität. .RELEASENOTES 0.1.0 – Initiale GitHub- und PowerShell-Gallery-fähige Version auf Basis des bereitgestellten Skripts. #> <#! .SYNOPSIS Prüft CSV-Dateien in einem Verzeichnis auf Format- und Import-Eigenschaften. .DESCRIPTION Das Skript untersucht alle Dateien in einem Verzeichnis. Für CSV-Dateien werden unter anderem Encoding, Codepage, BOM, Zeilenenden, Header, Delimiter, Datensatzanzahl und eine einfache Import-Kompatibilitätsbewertung ermittelt. Zusätzlich wird eine JSON-Gesamtausgabe erzeugt. .PARAMETER Path Verzeichnis, das geprüft werden soll. Standard ist das aktuelle Verzeichnis. .EXAMPLE .\Check-Csv.ps1 .EXAMPLE .\Check-Csv.ps1 -Path C:\Temp\CsvFiles #> #Requires -Version 5.1 [CmdletBinding()] param( [Parameter(Mandatory = $false)] [string]$Path = ".", [Parameter(Mandatory = $false, HelpMessage = 'Liste von Dateierweiterungen (z.B. ".csv", "txt"). Standard: .csv')] [string[]] $Extensions = @('.csv') ) Set-StrictMode -Version 2.0 $ErrorActionPreference = "Stop" function Get-FileEncodingInfo { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [string]$FilePath ) $bytes = [System.IO.File]::ReadAllBytes($FilePath) $result = [ordered]@{ EncodingName = "Unknown" EncodingCode = $null HasBom = $false DecodeEncoding = $null ByteLength = $bytes.Length ContainsNull = $false IsBinaryLike = $false } if ($bytes.Length -gt 0) { foreach ($b in $bytes) { if ($b -eq 0) { $result.ContainsNull = $true $result.IsBinaryLike = $true break } } } if ($bytes.Length -ge 3) { if ($bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF) { $result.EncodingName = "UTF-8 with BOM" $result.EncodingCode = 65001 $result.HasBom = $true $result.DecodeEncoding = New-Object System.Text.UTF8Encoding($true) return [pscustomobject]$result } } if ($bytes.Length -ge 2) { if ($bytes[0] -eq 0xFF -and $bytes[1] -eq 0xFE) { $result.EncodingName = "UTF-16 LE" $result.EncodingCode = 1200 $result.HasBom = $true $result.DecodeEncoding = [System.Text.Encoding]::Unicode return [pscustomobject]$result } if ($bytes[0] -eq 0xFE -and $bytes[1] -eq 0xFF) { $result.EncodingName = "UTF-16 BE" $result.EncodingCode = 1201 $result.HasBom = $true $result.DecodeEncoding = [System.Text.Encoding]::BigEndianUnicode return [pscustomobject]$result } } try { $utf8Strict = New-Object System.Text.UTF8Encoding($false, $true) [void]$utf8Strict.GetString($bytes) $result.EncodingName = "UTF-8 without BOM" $result.EncodingCode = 65001 $result.HasBom = $false $result.DecodeEncoding = New-Object System.Text.UTF8Encoding($false) return [pscustomobject]$result } catch { $result.EncodingName = "ANSI / Windows-1252" $result.EncodingCode = 1252 $result.HasBom = $false $result.DecodeEncoding = [System.Text.Encoding]::GetEncoding(1252) return [pscustomobject]$result } } function Get-LineEndingInfo { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [byte[]]$Bytes ) $crlfCount = 0 $lfCount = 0 $crCount = 0 $i = 0 while ($i -lt $Bytes.Length) { if ($Bytes[$i] -eq 13) { if (($i + 1) -lt $Bytes.Length -and $Bytes[$i + 1] -eq 10) { $crlfCount++ $i += 2 continue } else { $crCount++ $i++ continue } } elseif ($Bytes[$i] -eq 10) { $lfCount++ $i++ continue } $i++ } $detected = "None" $styleCount = 0 if ($crlfCount -gt 0) { $styleCount++ } if ($lfCount -gt 0) { $styleCount++ } if ($crCount -gt 0) { $styleCount++ } if ($styleCount -gt 1) { $detected = "Mixed" } elseif ($crlfCount -gt 0) { $detected = "CRLF" } elseif ($lfCount -gt 0) { $detected = "LF" } elseif ($crCount -gt 0) { $detected = "CR" } [pscustomobject]@{ LineEndingStyle = $detected CRLFCount = $crlfCount LFCount = $lfCount CRCount = $crCount IsMixed = ($styleCount -gt 1) } } function Get-CsvDelimiter { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [string]$HeaderLine ) $semicolonCount = ([regex]::Matches($HeaderLine, ";")).Count $commaCount = ([regex]::Matches($HeaderLine, ",")).Count $tabCount = ([regex]::Matches($HeaderLine, "`t")).Count if ($semicolonCount -ge $commaCount -and $semicolonCount -ge $tabCount -and $semicolonCount -gt 0) { return ";" } elseif ($tabCount -ge $commaCount -and $tabCount -gt 0) { return "`t" } else { return "," } } function Convert-FileToLines { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [string]$FilePath, [Parameter(Mandatory = $true)] [System.Text.Encoding]$Encoding ) $text = [System.IO.File]::ReadAllText($FilePath, $Encoding) if ($null -eq $text -or $text.Length -eq 0) { return @() } $text = $text -replace "`r`n", "`n" $text = $text -replace "`r", "`n" return @($text -split "`n") } function Normalize-HeaderValues { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [string[]]$HeaderValues ) $normalized = @() foreach ($value in $HeaderValues) { $clean = $value.Trim() if ($clean.StartsWith('"') -and $clean.EndsWith('"') -and $clean.Length -ge 2) { $clean = $clean.Substring(1, $clean.Length - 2) } $normalized += $clean } @($normalized) } function Get-FirstNonEmptyLine { [CmdletBinding()] param( [AllowNull()] [AllowEmptyCollection()] [object[]]$Lines ) if ($null -eq $Lines) { return $null } foreach ($line in @($Lines)) { if ($null -ne $line -and -not [string]::IsNullOrWhiteSpace([string]$line)) { return [string]$line } } $null } function Get-FileFormatAssessment { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [psobject]$EncodingInfo, [Parameter(Mandatory = $true)] [psobject]$LineEndingInfo, [Parameter(Mandatory = $true)] [int]$DataRecordCount ) $issues = New-Object System.Collections.ArrayList $warnings = New-Object System.Collections.ArrayList if ($EncodingInfo.IsBinaryLike) { [void]$issues.Add("Null-Bytes gefunden; Datei wirkt binär oder ungeeignet für CSV-Import.") } if ($LineEndingInfo.LineEndingStyle -eq "CR") { [void]$issues.Add("Zeilenenden nur CR erkannt; viele Zielsysteme verarbeiten das nicht sauber.") } if ($LineEndingInfo.IsMixed) { [void]$issues.Add("Gemischte Zeilenenden erkannt; Import kann fehlschlagen.") } if ($EncodingInfo.EncodingName -eq "ANSI / Windows-1252") { [void]$warnings.Add("ANSI/Windows-1252 erkannt; Zielsystem muss dieses Encoding explizit unterstützen.") } if ($EncodingInfo.EncodingName -like "UTF-16*") { [void]$warnings.Add("UTF-16 erkannt; viele CSV-Importe erwarten stattdessen UTF-8 oder ANSI.") } if ($DataRecordCount -eq 0) { [void]$warnings.Add("Keine Nutzdatenzeilen gefunden.") } $compatibility = "LikelyOK" if ($issues.Count -gt 0) { $compatibility = "PotentiallyIncompatible" } elseif ($warnings.Count -gt 0) { $compatibility = "CheckTargetRequirements" } [pscustomobject]@{ ImportCompatibility = $compatibility Issues = $issues.ToArray() Warnings = $warnings.ToArray() } } function Get-CsvValidationInfo { [CmdletBinding()] param( [Parameter(Mandatory = $true)] [string]$FilePath ) $result = [ordered]@{ FileName = [System.IO.Path]::GetFileName($FilePath) FullPath = $FilePath Extension = [System.IO.Path]::GetExtension($FilePath) IsCsv = $false Exists = $false EncodingName = $null EncodingCodePage = $null HasBom = $false FileSizeBytes = 0 ContainsNullBytes = $false BinaryLike = $false LineEndingStyle = $null LineEndingDetails = $null Delimiter = $null Header = @() HeaderWithInfo = $null DataRecordCount = 0 FirstRecord = $null First5RecordJsonList = @() ImportCompatibility = $null FormatIssues = @() FormatWarnings = @() Status = "Unknown" ErrorMessage = $null } try { if (-not (Test-Path -LiteralPath $FilePath -PathType Leaf)) { $result.Status = "FileNotFound" return [pscustomobject]$result } $result.Exists = $true if ([System.IO.Path]::GetExtension($FilePath).ToLowerInvariant() -ne ".csv") { $result.Status = "NotCsv" return [pscustomobject]$result } $result.IsCsv = $true $bytes = [System.IO.File]::ReadAllBytes($FilePath) $encodingInfo = Get-FileEncodingInfo -FilePath $FilePath $lineEndingInfo = Get-LineEndingInfo -Bytes $bytes $result.EncodingName = $encodingInfo.EncodingName $result.EncodingCodePage = $encodingInfo.EncodingCode $result.HasBom = $encodingInfo.HasBom $result.FileSizeBytes = $encodingInfo.ByteLength $result.ContainsNullBytes = $encodingInfo.ContainsNull $result.BinaryLike = $encodingInfo.IsBinaryLike $result.LineEndingStyle = $lineEndingInfo.LineEndingStyle $result.LineEndingDetails = $lineEndingInfo $lines = @(Convert-FileToLines -FilePath $FilePath -Encoding $encodingInfo.DecodeEncoding) if ($lines.Count -eq 0) { $result.Status = "EmptyFile" $assessment = Get-FileFormatAssessment -EncodingInfo $encodingInfo -LineEndingInfo $lineEndingInfo -DataRecordCount 0 $result.ImportCompatibility = $assessment.ImportCompatibility $result.FormatIssues = $assessment.Issues $result.FormatWarnings = $assessment.Warnings return [pscustomobject]$result } $headerLine = Get-FirstNonEmptyLine -Lines @($lines) if ([string]::IsNullOrWhiteSpace($headerLine)) { $result.Status = "EmptyHeader" $assessment = Get-FileFormatAssessment -EncodingInfo $encodingInfo -LineEndingInfo $lineEndingInfo -DataRecordCount 0 $result.ImportCompatibility = $assessment.ImportCompatibility $result.FormatIssues = @($assessment.Issues) $result.FormatWarnings = @($assessment.Warnings) return [pscustomobject]$result } $delimiter = Get-CsvDelimiter -HeaderLine $headerLine $result.Delimiter = $delimiter $rawHeader = $headerLine -split [regex]::Escape($delimiter) $normalizedHeader = Normalize-HeaderValues -HeaderValues $rawHeader $result.Header = $normalizedHeader $result.HeaderWithInfo = "{0} | Encoding={1} | CodePage={2} | LineEnding={3}" -f (($result.Header -join ", ")), $result.EncodingName, $result.EncodingCodePage, $result.LineEndingStyle $nonEmptyLines = @($lines | Where-Object { $null -ne $_ -and -not [string]::IsNullOrWhiteSpace([string]$_) }) $dataLineCount = $nonEmptyLines.Count - 1 if ($dataLineCount -lt 0) { $dataLineCount = 0 } $result.DataRecordCount = $dataLineCount $text = [System.IO.File]::ReadAllText($FilePath, $encodingInfo.DecodeEncoding) if (-not [string]::IsNullOrWhiteSpace($text)) { $csvObjects = @($text | ConvertFrom-Csv -Delimiter $delimiter) if ($csvObjects.Count -gt 0) { $result.FirstRecord = $csvObjects[0] $result.First5RecordJsonList = @($csvObjects | Select-Object -First 5) } } $assessment = Get-FileFormatAssessment -EncodingInfo $encodingInfo -LineEndingInfo $lineEndingInfo -DataRecordCount $result.DataRecordCount $result.ImportCompatibility = $assessment.ImportCompatibility $result.FormatIssues = $assessment.Issues $result.FormatWarnings = $assessment.Warnings $result.Status = "OK" return [pscustomobject]$result } catch { $result.Status = "Error" $result.ErrorMessage = $_.Exception.Message return [pscustomobject]$result } } try { $resolvedPath = (Resolve-Path -LiteralPath $Path).Path # Normalize extensions to include leading dot and lower-case $normalizedExtensions = @() foreach ($e in $Extensions) { if ($null -eq $e -or [string]::IsNullOrWhiteSpace($e)) { continue } $ext = $e.Trim() if (-not $ext.StartsWith('.')) { $ext = '.' + $ext } $normalizedExtensions += $ext.ToLowerInvariant() } if ($normalizedExtensions.Count -eq 0) { Write-Host "Keine gültigen Erweiterungen angegeben. Verwende .csv" $normalizedExtensions = @('.csv') } $files = @(Get-ChildItem -LiteralPath $resolvedPath -File | Where-Object { $normalizedExtensions -contains ([System.IO.Path]::GetExtension($_.Name).ToLowerInvariant()) } | Sort-Object Name) if ($files.Count -eq 0) { Write-Host "Keine Dateien im Verzeichnis gefunden: $resolvedPath" return } $results = foreach ($file in $files) { Get-CsvValidationInfo -FilePath $file.FullName } foreach ($item in $results) { Write-Host "------------------------------------------------------------" Write-Host ("Datei : {0}" -f $item.FileName) Write-Host ("Pfad : {0}" -f $item.FullPath) Write-Host ("CSV : {0}" -f $item.IsCsv) Write-Host ("Status : {0}" -f $item.Status) if ($item.IsCsv -and ($item.Status -eq "OK" -or $item.Status -eq "EmptyFile" -or $item.Status -eq "EmptyHeader")) { Write-Host ("Header : {0}" -f $item.HeaderWithInfo) Write-Host ("Delimiter : {0}" -f $item.Delimiter) Write-Host ("Datensaetze : {0}" -f $item.DataRecordCount) Write-Host ("Encoding : {0}" -f $item.EncodingName) Write-Host ("Codepage : {0}" -f $item.EncodingCodePage) Write-Host ("BOM : {0}" -f $item.HasBom) Write-Host ("LineEnding : {0}" -f $item.LineEndingStyle) Write-Host ("LineEndings Detail : CRLF={0}, LF={1}, CR={2}" -f $item.LineEndingDetails.CRLFCount, $item.LineEndingDetails.LFCount, $item.LineEndingDetails.CRCount) Write-Host ("Import-Kompat. : {0}" -f $item.ImportCompatibility) Write-Host ("Erster Datensatz : {0}" -f ($(if ($item.FirstRecord) { $item.FirstRecord | ConvertTo-Json -Depth 10 -Compress } else { $null }))) if ($item.FormatIssues.Count -gt 0) { Write-Host ("Issues : {0}" -f ($item.FormatIssues -join " | ")) } if ($item.FormatWarnings.Count -gt 0) { Write-Host ("Warnungen : {0}" -f ($item.FormatWarnings -join " | ")) } } if ($item.ErrorMessage) { Write-Host ("Fehler : {0}" -f $item.ErrorMessage) } } Write-Host "============================================================" Write-Host "JSON-Gesamtausgabe:" $results | Select-Object ` FileName, FullPath, IsCsv, EncodingName, EncodingCodePage, HasBom, FileSizeBytes, ContainsNullBytes, BinaryLike, LineEndingStyle, LineEndingDetails, HeaderWithInfo, Delimiter, DataRecordCount, FirstRecord, First5RecordJsonList, ImportCompatibility, FormatIssues, FormatWarnings, Status, ErrorMessage | ConvertTo-Json -Depth 10 } catch { Write-Error $_.Exception.Message } |