Sanitization.psm1
class RedactionRule { [ValidateNotNullOrEmpty()][string]$Pattern [string] Evaluate([int]$Seed){ throw "Cannot call Evaluate from base class" } } class RedactionRuleFunction:RedactionRule { [ValidateNotNullOrEmpty()][scriptblock]$NewValue RedactionRuleFunction ([string]$Pattern, [scriptblock]$NewValue) { $this.Pattern = $Pattern $this.NewValue = $NewValue } [string] Evaluate([int]$Seed){ return (& $this.NewValue $Seed) } } class RedactionRuleString:RedactionRule { [ValidateNotNullOrEmpty()][string]$NewValue RedactionRuleString ([string]$Pattern, [string]$NewValue) { $this.Pattern = $Pattern $this.NewValue = $NewValue } [string] Evaluate([int]$Seed){ return ($this.NewValue -f $Seed) } } function Convert-IPValue { [int]$t = $args[0] $o4 = ($t % 254) + 1 $t = $t / 254 $o3 = $t % 254 $t = $t / 254 $o2 = $t % 254 $t = $t / 254 $o1 = $t % 254 + 11 "$o1.$o2.$o3.$o4" } <# .SYNOPSIS Redact sensitive information from a file .DESCRIPTION Redact sensitive information from a file as an array of strings or one long string by defined redaction rules .PARAMETER RedactionRule Array of rules to redact by .PARAMETER Path Specifies a path to one or more locations. Wildcards are permitted. .PARAMETER LiteralPath Specifies a path to one or more locations. Unlike the Path parameter, the value of the LiteralPath parameter is used exactly as it is typed. No characters are interpreted as wildcards. If the path includes escape characters, enclose it in single quotation marks. Single quotation marks tell Windows PowerShell not to interpret any characters as escape sequences. .PARAMETER ReadRaw Ignores newline characters and pass the entire contents of a file in one string with the newlines preserved. By default, newline characters in a file are used as delimiters to separate the input into an array of strings. Process the file as one string instead of processing the strings line by line. .EXAMPLE $WULog = "$env:USERPROFILE\Desktop\WULog.log" Get-WindowsUpdateLog -LogPath $WULog Invoke-FileRedaction -Path $WULog -ReadRaw -RedactionRule @( New-RedactionRule '(?<=\d{4}\/\d{2}\/\d{2} \d{2}\:\d{2}\:\d{2}\.\d{7} \d{1,5} \d{1,5}\s+)\w+(?=\s+)' 'Component_{0}' ) .NOTES Invoke-RedactionRule creates 2 files in the same location of the input file, the redacted file with "-Sanitized.txt" suffix and the conversion table csv file with "-ConversionTable.csv" suffix. By default all strings in the files are processed with Invoke-Redaction with the -Consistent parameter. #> function Invoke-FileRedaction { [CmdletBinding()] param ( [Parameter(Mandatory = $true, Position = 0)] [RedactionRule[]]$RedactionRule, # Specifies a path to one or more locations. Wildcards are permitted. [Parameter(Mandatory=$true, Position=1, ParameterSetName="Path", ValueFromPipeline=$true, ValueFromPipelineByPropertyName=$true, HelpMessage="Path to one or more locations.")] [ValidateNotNullOrEmpty()] [SupportsWildcards()] [string[]] $Path, # Specifies a path to one or more locations. Unlike the Path parameter, the value of the LiteralPath parameter is # used exactly as it is typed. No characters are interpreted as wildcards. If the path includes escape characters, # enclose it in single quotation marks. Single quotation marks tell Windows PowerShell not to interpret any # characters as escape sequences. [Parameter(Mandatory=$true, Position=1, ParameterSetName="LiteralPath", ValueFromPipelineByPropertyName=$true, HelpMessage="Literal path to one or more locations.")] [Alias("PSPath")] [ValidateNotNullOrEmpty()] [string[]] $LiteralPath, [switch]$ReadRaw ) begin { $ExportCSVProperties = @{} if($PSVersionTable.PSVersion.Major -le 5){ $ExportCSVProperties['NoTypeInformation'] = $true } } process { $paths = @() if ($psCmdlet.ParameterSetName -eq 'Path') { foreach ($aPath in $Path) { if (!(Test-Path -Path $aPath)) { $ex = New-Object System.Management.Automation.ItemNotFoundException "Cannot find path '$aPath' because it does not exist." $category = [System.Management.Automation.ErrorCategory]::ObjectNotFound $errRecord = New-Object System.Management.Automation.ErrorRecord $ex,'PathNotFound',$category,$aPath $psCmdlet.WriteError($errRecord) continue } # Resolve any wildcards that might be in the path $provider = $null $paths += $psCmdlet.SessionState.Path.GetResolvedProviderPathFromPSPath($aPath, [ref]$provider) } } else { foreach ($aPath in $LiteralPath) { if (!(Test-Path -LiteralPath $aPath)) { $ex = New-Object System.Management.Automation.ItemNotFoundException "Cannot find path '$aPath' because it does not exist." $category = [System.Management.Automation.ErrorCategory]::ObjectNotFound $errRecord = New-Object System.Management.Automation.ErrorRecord $ex,'PathNotFound',$category,$aPath $psCmdlet.WriteError($errRecord) continue } # Resolve any relative paths $paths += $psCmdlet.SessionState.Path.GetUnresolvedProviderPathFromPSPath($aPath) } } foreach ($aPath in $paths) { # Output will be on the same directory $SanitizedFilePath = $aPath + "-Sanitized.txt" 'Sanitized File: {0}' -f $SanitizedFilePath | Write-Verbose $ConversionTableFilePath = $aPath + "-ConversionTable.csv" 'Conversion Table File: {0}' -f $ConversionTableFilePath | Write-Verbose $TotalLines = Get-Content $aPath | Measure-Object -Line | Select-Object -ExpandProperty Lines 'Total No.Lines: {0}' -f $TotalLines | Write-Verbose if ($TotalLines -eq 0) { $TotalLines = 1 } Write-Progress -Activity "Redacting sensitive data from file: `"$aPath`"" -Id 1 Get-Content $aPath -Raw:$ReadRaw | Invoke-Redaction -RedactionRule $RedactionRule -Consistent -OutConversionTable 'ConversionTable' -TotalLines $TotalLines | Out-File -FilePath $SanitizedFilePath $ConversionTable.Keys | Select-Object -Property @{N = 'NewValue'; E = {$ConversionTable[$_]}}, @{N = 'Original'; E = {$_}} | Sort-Object -Property NewValue | Export-Csv -Path $ConversionTableFilePath @ExportCSVProperties [PSCustomObject]@{ Original = $aPath Sanitized = $SanitizedFilePath ConversionTable = $ConversionTableFilePath } } } end { Write-Progress -Activity "[Done] Redacting sensitive data from file: `"$aPath`" [Done]" -Id 1 -Completed } } <# .SYNOPSIS Redact sensitive information from an object .DESCRIPTION Redact sensitive information from an object as string by defined redaction rules .PARAMETER RedactionRule Array of redaction rules to redact by .PARAMETER InputObject String to redact sensitive information from .PARAMETER Consistent Saves discovered values in a ConversionTable (hash table), when the same values disceverd again they are replaced with the same string that was generated the first time from the redaction rule NewValue function or NewValue formatted string. It uses a uniqueness value to generate new value from the redaction rule (if applicable). if Consistent is ommitted generation of new value from redaction rule's NewValues is based on current line number. .PARAMETER OutConversionTable Creates a variable with the specified name and the ConversionTable as its value. .PARAMETER AsObject Return an object with the old string, the processed string, line number and if the string was changed or not instead of just a processed string. .PARAMETER TotalLines Number of lines that are going to be processed over the pipeline. Relevant for showing informative progress bar. .EXAMPLE Replace all a-z letters with '+' sign $RedactionRule = New-RedactionRule -Pattern '[a-z]' -NewValueString '+' ipconfig /all | Invoke-Redaction -RedactionRule $RedactionRule .EXAMPLE Replace all service names that start with the letter 's' with 's_{0}', where {0} is replaced by uniqueness factor. Each unique serivce name will be replaced with a unique new value 's_{0}' and it will stay consistent if the service shows up multiple times. $RedactionRule = New-RedactionRule -Pattern '(?<=\s)[Ss].+' -NewValueString 's_{0}' Get-Process | Out-String | Invoke-Redaction -RedactionRule $RedactionRule -Consistent .NOTES #> function Invoke-Redaction { [Alias('Invoke-Sanitization', 'irdac', 'isntz')] [CmdletBinding()] param( [Parameter(Mandatory = $true, Position = 0)] [RedactionRule[]]$RedactionRule, # One line string [Parameter(Mandatory = $true, ValueFromPipeline = $true, Position = 1)] [AllowEmptyString()] # Incoming lines can be empty, so applied because of the Mandatory flag [psobject] $InputObject, # Requires $ConversionTable but if it won't be provided, empty hash table for $ConversionTable will be initialized instead [switch] $Consistent, [switch] $AsObject, [ValidateRange(1, [int]::MaxValue)] [int] $TotalLines = 1 ) DynamicParam { if ($Consistent) { $ParameterName = 'OutConversionTable' $RuntimeParameterDictionary = New-Object System.Management.Automation.RuntimeDefinedParameterDictionary $AttributeCollection = New-Object System.Collections.ObjectModel.Collection[System.Attribute] $ValidateNotNullOrEmptyAttribute = New-Object System.Management.Automation.ValidateNotNullOrEmptyAttribute $AttributeCollection.Add($ValidateNotNullOrEmptyAttribute) $ParameterAttribute = New-Object System.Management.Automation.ParameterAttribute $AttributeCollection.Add($ParameterAttribute) $RuntimeParameter = New-Object System.Management.Automation.RuntimeDefinedParameter($ParameterName, [string], $AttributeCollection) $RuntimeParameterDictionary.Add($ParameterName, $RuntimeParameter) return $RuntimeParameterDictionary } } Begin { if ($Consistent) { $OutConversionTable = $PSBoundParameters[$ParameterName] $ConversionTable = @{} $Uniqueness = 0 } #region Write-Progress calculation block initialization $PercentComplete = 0 $PercentStep = 100 / $TotalLines [double]$AverageTime = 0 [int]$SecondsRemaining = $AverageTime * $TotalLines $StopWatch = [System.Diagnostics.Stopwatch]::new() $StopWatch.Start() #endregion $LineNumber = 0 } Process { $CurrentString = $InputObject.ToString() $CurrentStringChanged = $false foreach ($Rule in $RedactionRule) { $Matches = Select-String -InputObject $CurrentString -Pattern $Rule.Pattern -AllMatches | Select-Object -ExpandProperty Matches | Sort-Object -Property Index -Descending # Sort Descending is required so the replacments won't overwrite each other if ($Matches) { $CurrentStringChanged = $true $StrSB = New-Object System.Text.StringBuilder($CurrentString) Foreach ($Match in $Matches) { $MatchedValue = $Match.Value 'MatchedValue = {0}' -f $MatchedValue | Write-Verbose if ($Consistent) { if ($null -eq $ConversionTable[$MatchedValue]) { # MatchedValue doesn't exist in the ConversionTable # Adding MatchedValue to the ConversionTable, add it with line number (if {0} is specified in $NewValue) $ConversionTable[$MatchedValue] = $Rule.Evaluate($Uniqueness) 'Adding new value to the conversion table: $ConvetionTable[{0}] = {1}' -f $MatchedValue, $ConversionTable[$MatchedValue] | Write-Verbose $Uniqueness++ } # This MatchedValue exists, use it. $Replacement = $ConversionTable[$MatchedValue] } else { $Replacement = $Rule.Evaluate($LineNumber) } $null = $StrSB.Remove($Match.Index, $Match.Length) $null = $StrSB.Insert($Match.Index, $Replacement) } $CurrentString = $StrSB.ToString() } } # foreach($Rule in $ReductionRule) if ($AsObject) { $OutputProperties = @{ LineNumber = $LineNumber CurrentString = $CurrentString Original = $InputObject Changed = $CurrentStringChanged } $OutputPropertiesList = 'LineNumber', 'CurrentString', 'Original', 'Changed' if ($Consistent) { $OutputProperties['Uniqueness'] = $Uniqueness $OutputPropertiesList += 'Uniqueness' } New-Object -TypeName PSCustomObject -Property $OutputProperties | Select-Object $OutputPropertiesList } else { $CurrentString } #region Write-Progress calculation block if ($TotalLines -gt $LineNumber) { $PercentComplete += $PercentStep $ElapsedSeconds = $StopWatch.Elapsed.TotalSeconds $StopWatch.Restart() [double]$AverageTime = ($AverageTime * $LineNumber + $ElapsedSeconds) / ($LineNumber + 1) [int]$SecondsRemaining = $AverageTime * ($TotalLines - $LineNumber) 'L = {0} | Avg = {1} | Remain(S) = {2}' -f $LineNumber, $AverageTime, $ElapsedSeconds, $SecondsRemaining | Write-Debug } Write-Progress -Activity "Redacting sensitive data. Line Number: $LineNumber out of $TotalLines" -Id 2 -ParentId 1 -PercentComplete $PercentComplete -SecondsRemaining $SecondsRemaining #endregion $LineNumber++ } # Process end { #region Write-Progress calculation block closing $StopWatch.Stop() Write-Progress -Activity "[Done] Redacting sensitive data [Done]" -Id 2 -ParentId 1 -Completed #endregion if (-not [string]::IsNullOrWhiteSpace($OutConversionTable)) { '$PSCmdlet.MyInvocation.CommandOrigin: {0}' -f $PSCmdlet.MyInvocation.CommandOrigin | Write-Debug if ($PSCmdlet.MyInvocation.CommandOrigin -eq 'Runspace') { $PSCmdlet.SessionState.PSVariable.Set($OutConversionTable, $ConversionTable) } else { # CommandOrigin: Internal Set-Variable -Name $OutConversionTable -Value $ConversionTable -Scope 2 } } } } <# .SYNOPSIS Creates new redaction rule. .DESCRIPTION Creates new redaction rule with regex pattern to look for and NewValue to replace with. .PARAMETER Pattern Regex pattern .PARAMETER NewValueFunction Script block to generate new generic data, the result is then put instead of the original value. This script block can accept at most 1 int parameter with $args[0] or declare variable in param() block .PARAMETER NewValueString String value to be replaced instead of pattern. The string can contain place holder {0}, and it will be replaced with uniqueness factor. .PARAMETER CommonRule Predefined rules - patterns and values .EXAMPLE New-RedactionRule '(?<=\().*(?=\))' 'Process_{0}' .EXAMPLE Mark '[a-z]' { [long]$p = $args[0]; [char]($p % 26 + 65) } .EXAMPLE Mark -CommonRule IPV4Address .NOTES #> Function New-RedactionRule { [Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSUseShouldProcessForStateChangingFunctions", "")] [Alias('New-SanitizationRule','New-MarkingRule','Mark')] # Usually Single word is an automatic alias for Get-<SingleWord> [OutputType([RedactionRule])] [CmdletBinding(DefaultParameterSetName = 'CustomFunction')] param( # Regex pattern with 1 named capturing group at most [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'CustomString')] [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'CustomFunction')] [string]$Pattern, # Value can contain {0} so counter value will be added [Parameter(Mandatory = $true, Position = 1, ParameterSetName = 'CustomFunction')] [scriptblock]$NewValueFunction, [Parameter(Mandatory = $true, Position = 1, ParameterSetName = 'CustomString')] [String]$NewValueString, [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'Common')] [ValidateSet('IPV4Address')] [string]$CommonRule ) if ($PSCmdlet.ParameterSetName -eq 'Common') { $Script:CommonRuleTable[$CommonRule] } elseif($PSCmdlet.ParameterSetName -eq 'CustomFunction') { New-Object RedactionRuleFunction($Pattern, $NewValueFunction) } elseif($PSCmdlet.ParameterSetName -eq 'CustomString') { New-Object RedactionRuleString($Pattern, $NewValueString) } } $Script:CommonRuleTable = @{ 'IPV4Address' = New-RedactionRule -Pattern '\b(\d{1,3}(\.\d{1,3}){3})\b' -NewValueFunction ${Function:Convert-IPValue} #'IPV6Address' = New-RedactionRule -Pattern '\b(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b' -NewValueFunction ${Function:Generate-IPValue} #'MACAddress' = New-RedactionRule -Pattern '\b([0-9A-F]{2}[:-]){5}([0-9A-F]{2})\b' -NewValueFunction ${Function:Generate-IPValue} #'GUID' = New-RedactionRule -Pattern '\b[{(]?[0-9A-F]{8}[-]?(?:[0-9A-F]{4}[-]?){3}[0-9A-F]{12}[)}]?\b' -NewValueFunction ${Function:Generate-IPValue} } Export-ModuleMember -Function * -Alias * -Cmdlet * |