Public/Import-ExcelFile.ps1
|
function Import-ExcelFile { <# .SYNOPSIS Import Excel file (.xlsx, .xlsb, .xls) to PowerShell. .DESCRIPTION This function applies C# library `ExcelDataReader`. The data in Excel sheets are assumed to be "normal" e.g., start at first row and first column. It runs much faster, especially for files with few hundred thousand rows, than the `Import-Excel` function in `ImportExcel` PowerShell library although the latter has much more functionality and flexibility. The speed consideration is one of my motivation to write this function. .PARAMETER excel_path Path of your Excel (.xlsx or .xlsb or .xls) file. .PARAMETER sheet_names The sheets in Excel file you want to include. If more than one, store them in array e.g., @('Sheet1', 'Sheet2'). You can also use wildcard * to indicate you want to include all sheets .PARAMETER only_visible Optional switch. This switch will only be applied if sheet_names is *. -only_visible means all visible sheets in the Excel will be loaded. .PARAMETER return_array Optional switch. -return_array means data of a particular sheet will be saved in System.Array, instead of DataTable (Default). .INPUTS excel_path. .OUTPUTS An ordered dictionary (i.e., ordered hash table). Key = sheet name; Value = sheet data. Sheet data can be saved in DataTable (Default) or System.Array. .EXAMPLE #> [CmdletBinding()] param ( [ValidateNotNullorEmpty()] [Parameter(Mandatory=$true)] [string]$excel_path, [ValidateNotNullorEmpty()] [string[]]$sheet_names, [switch]$only_visible, [switch]$return_array ) $excel_path = (Resolve-Path $excel_path -ErrorAction Stop).Path if ([System.IO.Path]::GetExtension($excel_path) -notmatch '^\.xlsx$|^\.xlsb$|^\.xls$') { throw "$excel_path is not a xlsx, xlsb or xls file" } $mode = [System.IO.FileMode]::Open $access = [System.IO.FileAccess]::Read $file_stream = New-Object -TypeName System.IO.FileStream $excel_path, $mode, $access $excel_data_reader = [ExcelDataReader.ExcelReaderFactory]::CreateReader($file_stream) if ($only_visible) { $all_sheet_names = Get-ExcelSheetNames $excel_path -only_visible } else { $all_sheet_names = Get-ExcelSheetNames $excel_path } if ($sheet_names -eq '*') { $sheet_names = $all_sheet_names } elseif (-not $sheet_names) { $sheet_names = $all_sheet_names[0] } $sheet_index_array = @() foreach ($sheet_name in $sheet_names) { if ($all_sheet_names.Contains($sheet_name)) { $sheet_index_array += $all_sheet_names.IndexOf($sheet_name) } else { throw "$sheet_name is not found in the Excel workbook / $sheet_name is invisible but you select ``only visible`` option" } } function FilterSheetCallback { param ($reader, $sheet_index) return $sheet_index_array.Contains($sheet_index) } function ConfigureDataTableCallback { param ($reader) $data_table_conf = New-Object -TypeName ExcelDataReader.ExcelDataTableConfiguration $data_table_conf.UseHeaderRow = $true return $data_table_conf } $data_set_conf = New-Object -TypeName ExcelDataReader.ExcelDataSetConfiguration $data_set_conf.UseColumnDataType = $true $data_set_conf.FilterSheet = $Function:FilterSheetCallback $data_set_conf.ConfigureDataTable = $Function:ConfigureDataTableCallback $excel_data_set = [ExcelDataReader.ExcelDataReaderExtensions]::AsDataSet($excel_data_reader, $data_set_conf) $excel_data_reader.Dispose() $file_stream.Close() $file_stream.Dispose() $data_table_collections = $excel_data_set.Tables $selected_sheet_names = $data_table_collections.TableName $dict = [ordered]@{} foreach ($selected_sheet_name in $selected_sheet_names) { $selected_sheet_index = $selected_sheet_names.IndexOf($selected_sheet_name) if ($return_array) { $dict[$selected_sheet_name] = [System.Array]$data_table_collections[$selected_sheet_index] } else { $dict[$selected_sheet_name] = $data_table_collections[$selected_sheet_index] } } return $dict } |