encoding.ps1
function Assert-FileEncoding { param ( [Parameter(Mandatory = $True, ValueFromPipeline = $True, ValueFromPipelineByPropertyName = $true)] [string]$FullName, [string[]]$AcceptableEncoding = $('UTF8', 'ASCII') ) begin { $failed = @() } process { $currentEncoding = Get-FileEncoding $FullName if ( $AcceptableEncoding -notcontains $currentEncoding ) { $failed += $FullName } } end { if ( $failed ) { $failed throw "Files found with not acceptable $AcceptableEncoding " } } } <# .EXAMPLE gci -fi common.ps1 -Recurse | Set-FileEncoding -Encoding='ASCII' #> function Set-FileEncoding { param ( [Parameter(Mandatory = $True, ValueFromPipeline = $True, ValueFromPipelineByPropertyName = $true)] [string]$FullName, [string]$Encoding='UTF8', [string[]]$AcceptableEncoding = $() ) begin { $AcceptableEncoding = @( $Encoding ) + $AcceptableEncoding | Select-Object -Unique } process { $currentEncoding = Get-FileEncoding $FullName if ( $AcceptableEncoding -notcontains $currentEncoding ) { $content = Get-Content $FullName $content | Out-File $FullName -Encoding $Encoding } } } # http://franckrichard.blogspot.com/2010/08/powershell-get-encoding-file-type.html <# .DESCRIPTION The Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM). Based on port of C# code from http://www.west-wind.com/Weblog/posts/197245.aspx .EXAMPLE gci -fi *.ps1 -Recurse | select fullname, @{ 'n' = 'Encoding'; 'e' = { Get-FileEncoding $_.Fullname } } #> function Get-FileEncoding { [CmdletBinding()] Param ( [Parameter(Mandatory = $True, ValueFromPipeline = $True, ValueFromPipelineByPropertyName = $true)] [string]$FullName ) process { [byte[]]$byte = get-content -AsByteStream -Path $FullName -ReadCount 4 -TotalCount 4 #Write-Host Bytes: $byte[0] $byte[1] $byte[2] $byte[3] if (-not $byte.Count) { return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::Unknown } # EF BB BF (UTF8) if ( $byte[0] -eq 0xef -and $byte[1] -eq 0xbb -and $byte[2] -eq 0xbf ) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::UTF8 return [System.Text.Encoding]::UTF8 } # FE FF (UTF-16 Big-Endian) 'Unicode UTF-16 Big-Endian' if ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::BigEndianUnicode return [System.Text.Encoding]::BigEndianUnicode } # FF FE (UTF-16 Little-Endian) 'Unicode UTF-16 Little-Endian' if ($byte[0] -eq 0xff -and $byte[1] -eq 0xfe) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::Unicode return [System.Text.Encoding]::Unicode } # 00 00 FE FF (UTF32 Big-Endian) if ($byte[0] -eq 0 -and $byte[1] -eq 0 -and $byte[2] -eq 0xfe -and $byte[3] -eq 0xff) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::UTF32 return [System.Text.Encoding]::UTF32 } # FE FF 00 00 (UTF32 Little-Endian) if ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff -and $byte[2] -eq 0 -and $byte[3] -eq 0) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::UTF32 return [System.Text.Encoding]::UTF32 } # 2B 2F 76 (38 | 38 | 2B | 2F) if ($byte[0] -eq 0x2b -and $byte[1] -eq 0x2f -and $byte[2] -eq 0x76 -and ($byte[3] -eq 0x38 -or $byte[3] -eq 0x39 -or $byte[3] -eq 0x2b -or $byte[3] -eq 0x2f) ) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::UTF7 return [System.Text.Encoding]::UTF7 } # F7 64 4C (UTF-1) if ( $byte[0] -eq 0xf7 -and $byte[1] -eq 0x64 -and $byte[2] -eq 0x4c ) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::Unknown return 'Unknown' } # DD 73 66 73 (UTF-EBCDIC) if ($byte[0] -eq 0xdd -and $byte[1] -eq 0x73 -and $byte[2] -eq 0x66 -and $byte[3] -eq 0x73) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::Unknown return 'Unknown' } # 0E FE FF (SCSU) if ( $byte[0] -eq 0x0e -and $byte[1] -eq 0xfe -and $byte[2] -eq 0xff ) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::Unknown return 'Unknown' } # FB EE 28 (BOCU-1) if ( $byte[0] -eq 0xfb -and $byte[1] -eq 0xee -and $byte[2] -eq 0x28 ) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::Unknown return 'Unknown' } # 84 31 95 33 (GB-18030) if ($byte[0] -eq 0x84 -and $byte[1] -eq 0x31 -and $byte[2] -eq 0x95 -and $byte[3] -eq 0x33) { # return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::Unknown return 'Unknown' } #return [Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding]::Ascii return [System.Text.Encoding]::ASCII } } function Get-FileNewLine { [CmdletBinding()] param( [Parameter(ValueFromPipeline=$true)] $path ) process { $path = Get-Path -path $path $content = Get-Content $path -Raw if ( -not $content ) { return "" } $newLineStats = [PSCustomObject]@{ CrLf = 0; Cr = 0; Lf = 0; LastCr = -2; LastLf = -2 } $i = -1; foreach($ch in $content.GetEnumerator()){ $i++ if ($ch -eq "`r") { $newLineStats.LastCr = $i $newLineStats.Cr ++ } if ($ch -eq "`n") { $newLineStats.LastLf = $i $newLineStats.Lf ++ if ( $newLineStats.LastCr -eq ($i - 1) ) { $newLineStats.CrLf ++ } } } if ( ( $newLineStats.Cr ) -and ( -not $newLineStats.Lf ) ) { return "Cr" } if ( ( $newLineStats.Lf ) -and ( -not $newLineStats.Cr ) ) { return "Lf" } if ( ( $newLineStats.CrLf -eq $newLineStats.Cr ) -and ( $newLineStats.CrLf -eq $newLineStats.Lf ) ) { return "CrLf" } return "Mixed" } } function ConvertTo-CrLf { param ( [parameter(ValueFromPipeline=$true)] $path, $Delimeter = "`r`n", [Switch]$RemoveEmptyLines ) process { $path = Get-Path $path $name = split-path $path -Leaf $folder = split-path $path -Parent $bak = "$folder\$name.bak" $encoding = Get-FileEncoding -FullName $path if ( Test-Path $bak ) { Remove-Item $bak -Force } Rename-Item -Path $path -NewName "$name.bak" -Force if ($RemoveEmptyLines) { Get-Content -Path $bak | Where-Object { -not [String]::IsNullOrEmpty($_) } | Set-Content -Path $path -Encoding $encoding } else { Get-Content -Path $bak | Set-Content -Path $path -Encoding $encoding } $bakHash = Get-FileHash -Path $bak $newHash = Get-FileHash -Path $path if ( $bakHash.Hash -eq $newHash.Hash ) { Remove-Item $bak -Force } else { Write-Verbose -Message "File:'$path' Crlf changed" } } } function ConvertTo-CrLf:::Example { ConvertTo-CrLf -path 'C:\temp\PROD-2017-02\Export\Expand\TSTTBC\CTSTRATSRP01ONI\wss\VirtualDirectories\{{CPAlias}}.MYTMBC.C360.PRIVATE443\web.config' -RemoveEmptyLines $folder = 'C:\temp\Export' Get-Unix_Files_LineEndings $folder -include '*.config', '*.txt' | ConvertTo-CrLf $unixFilesAfter = Get-ChildItem -Path $folder -Include $include -Recurse | Where-Object { ( Get-FileNewLine $_ ) -notin @("CrLf","") } $unixFilesAfter.Count | assert -eq 0 } function Get-Unix_Files_LineEndings { param( [Parameter(ValueFromPipeline=$true)] $folder, $include ) begin { if (-not $include ) { $include = Get-Text_Filter } } process { $unixFiles = Get-ChildItem -Path $folder -Include $include -Recurse -File | Where-Object { ( Get-FileNewLine $_ ) -ne "CrLf" } $unixFiles } } function ConvertFrom-Unix_to_Win { param ( [Parameter(ValueFromPipeline=$true)] $path ) begin { $gitSrc = (get-command git).Source $gitFolder = Split-Path ( Split-Path $gitSrc -Parent ) -Parent $unix2dos = "$gitFolder\usr\bin\unix2dos.exe" } process { $path = Get-Path $path $path = $path | Update-String_Enclose '"' -conditional Invoke-Exe $unix2dos $path } } |