public/Split-File.ps1
<# .SYNOPSIS Split a CSV or XLSX file based on values in a column .DESCRIPTION Split a CSV or XLSX file into one or more separate files based on values in a column. For example, if file named "Employees.xlsx" has a column named 'EmployeeType' contains values like 'EMP', 'CWR', 'TMP', this will create 3 new files based on the rows which have those corresponding values. In this case, the three new files would be "Employees_EMP.xlsx", "Employees_CWR.xlsx" and "Employees_TMP.xlsx". Each file would contain only the matching rows in the source file which have the corresponding value. .PARAMETER Path File to be parsed. .PARAMETER KeyColumn Name of column which will be analyzed. .PARAMETER KeyValues One or more values which will be used to parse out matching rows to separate files. If this parameter is blank (default) all unique values in the KeyColumn set will be used to determine the resulting set of output files. .PARAMETER WorksheetName Optional. For XLSX files only, identifies the tab (worksheet) to parse. This setting has no effect when used with CSV files. .PARAMETER OutputFolder Optional. Path where new files are to be created. The default is the same path as the source file. .EXAMPLE Split-File -Path "c:\temp\employees.xlsx" -WorksheetName "EmpData" -KeyColumn "EmpType" -KeyValues ("EMP","CWR") Creates c:\temp\employees_EMP.xlsx and c:\temp\employees_CWR.xlsx, if there are rows found where the "EmpType" column contains either "EMP" or "CWR". Rows with other values are ignored. .EXAMPLE Split-File -Path "c:\temp\products.csv" -KeyColumn "category" -OutputFolder c:\temp\files Creates one file for each unique value in the "category" column, and saves each new file to c:\temp\files with prefix "products_<value>.csv" where <value> is each unique value in "category". .NOTES Excel XLSX files require the PowerShell module ImportExcel .LINK https://github.com/Skatterbrainz/helium/blob/master/docs/Split-File.md #> function Split-File { [CmdletBinding()] param ( [parameter(Mandatory=$True)][string]$Path, [parameter(Mandatory=$True)][string]$KeyColumn, [parameter(Mandatory=$False)][string[]]$KeyValues, [parameter(Mandatory=$False)][string]$WorksheetName = "Sheet1", [parameter(Mandatory=$False)][string]$OutputFolder ) try { if (-not(Test-Path $Path)) { throw "File not found: $Path" } $file = Get-Item -Path $Path -ErrorAction Stop $filetype = $file.Extension $basename = $file.BaseName if ($filetype -eq '.xlsx') { if (!(Get-Module ImportExcel -ListAvailable)) { throw "Parsing Excel XLSX files requires the PowerShell module: ImportExcel" } } else { if ($filetype -ne '.csv') { throw "Only CSV and XLSX files are supported" } } if ($PSBoundParameters.ContainsKey('OutputFolder')) { if (-not(Test-Path $OutputFolder)) { throw "Output folder not found: $OutputFolder" } } else { $OutputFolder = $file.Parent } if ($filetype -eq '.csv') { $rows = Import-Csv -Path $Path -Encoding Utf8 -ErrorAction Stop } else { $rows = Import-Excel -Path $Path -WorksheetName $WorksheetName -ErrorAction Stop } Write-Host "$($rows.Count) rows imported from: $Path" $trows = 0 if ($PSBoundParameters.ContainsKey('KeyValues')) { if (($KeyValues.Count -eq 0) -or ([string]::IsNullOrWhiteSpace($KeyValues))) { $values = $rows."$KeyColumn" | Select-Object -Unique } else { $values = $KeyValues } } else { $values = $KeyValues } foreach ($v in $values) { $vrows = $rows | Where-Object {$_."$KeyColumn" -eq "$v"} if ($vrows.Count -gt 0) { $newfile = Join-Path $outputfolder "$($basename)_$($v)$($filetype)" if ($filetype -eq '.xlsx') { $vrows | Export-Excel -Path $newfile -WorksheetName $WorksheetName -ClearSheet } else { $vrows | Export-Csv -Path $newfile -Encoding Utf8 -Force } Write-Host "$($vrows.Count) rows exported to: $newfile" $trows += $vrows.Count } else { Write-Warning "No matching rows found: $($KeyColumn) = $($v)" } } Write-Host "$($rows.Count) total rows analyzed. $($trows) rows exported. $(($rows.Count) - $trows) not exported." } catch { Write-Error $_.Exception.Message } } |