Sanitization.psm1

class RedactionRule {
    [ValidateNotNullOrEmpty()][string]$Pattern

    [string] Evaluate([int]$Seed){
        throw "Cannot call Evaluate from base class"
    }
}

class RedactionRuleFunction:RedactionRule {
    [ValidateNotNullOrEmpty()][scriptblock]$NewValue

    RedactionRuleFunction ([string]$Pattern, [scriptblock]$NewValue) {
        $this.Pattern = $Pattern
        $this.NewValue = $NewValue
    }

    [string] Evaluate([int]$Seed){
        return (& $this.NewValue $Seed)
    }
}

class RedactionRuleString:RedactionRule {
    [ValidateNotNullOrEmpty()][string]$NewValue

    RedactionRuleString ([string]$Pattern, [string]$NewValue) {
        $this.Pattern = $Pattern
        $this.NewValue = $NewValue
    }

    [string] Evaluate([int]$Seed){
        return ($this.NewValue -f $Seed)
    }
}
function Convert-IPValue {
    [int]$t = $args[0]

    $o4 = ($t % 254) + 1
    $t = $t / 254
    $o3 = $t % 254
    $t = $t / 254 
    $o2 = $t % 254
    $t = $t / 254
    $o1 = $t % 254 + 11

    "$o1.$o2.$o3.$o4"
}
<#
.SYNOPSIS
Redact sensitive information from a file
 
.DESCRIPTION
Redact sensitive information from a file as an array of strings or one long string by defined redaction rules
 
.PARAMETER RedactionRule
Array of rules to redact by
 
.PARAMETER Path
Specifies a path to one or more locations. Wildcards are permitted.
 
.PARAMETER LiteralPath
Specifies a path to one or more locations. Unlike the Path parameter, the value of the LiteralPath parameter is
used exactly as it is typed. No characters are interpreted as wildcards. If the path includes escape characters,
enclose it in single quotation marks. Single quotation marks tell Windows PowerShell not to interpret any
characters as escape sequences.
 
.PARAMETER ReadRaw
Ignores newline characters and pass the entire contents of a file in one string with the newlines preserved.
By default, newline characters in a file are used as delimiters to separate the input into an array of strings.
Process the file as one string instead of processing the strings line by line.
 
.EXAMPLE
$WULog = "$env:USERPROFILE\Desktop\WULog.log"
Get-WindowsUpdateLog -LogPath $WULog
Invoke-FileRedaction -Path $WULog -ReadRaw -RedactionRule @(
    New-RedactionRule '(?<=\d{4}\/\d{2}\/\d{2} \d{2}\:\d{2}\:\d{2}\.\d{7} \d{1,5} \d{1,5}\s+)\w+(?=\s+)' 'Component_{0}'
)
 
.NOTES
Invoke-RedactionRule creates 2 files in the same location of the input file,
the redacted file with "-Sanitized.txt" suffix
and the conversion table csv file with "-ConversionTable.csv" suffix.
By default all strings in the files are processed with Invoke-Redaction with the -Consistent parameter.
#>

function Invoke-FileRedaction {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true, 
            Position = 0)]
        [RedactionRule[]]$RedactionRule,
        # Specifies a path to one or more locations. Wildcards are permitted.
        [Parameter(Mandatory=$true,
                   Position=1,
                   ParameterSetName="Path",
                   ValueFromPipeline=$true,
                   ValueFromPipelineByPropertyName=$true,
                   HelpMessage="Path to one or more locations.")]
        [ValidateNotNullOrEmpty()]
        [SupportsWildcards()]
        [string[]]
        $Path,
        # Specifies a path to one or more locations. Unlike the Path parameter, the value of the LiteralPath parameter is
        # used exactly as it is typed. No characters are interpreted as wildcards. If the path includes escape characters,
        # enclose it in single quotation marks. Single quotation marks tell Windows PowerShell not to interpret any
        # characters as escape sequences.
        [Parameter(Mandatory=$true,
                   Position=1,
                   ParameterSetName="LiteralPath",
                   ValueFromPipelineByPropertyName=$true,
                   HelpMessage="Literal path to one or more locations.")]
        [Alias("PSPath")]
        [ValidateNotNullOrEmpty()]
        [string[]]
        $LiteralPath,
        [switch]$ReadRaw
    )

    begin {
        $ExportCSVProperties = @{}
        if($PSVersionTable.PSVersion.Major -le 5){
            $ExportCSVProperties['NoTypeInformation'] = $true
        } 
    }

    process {
        $paths = @()
        if ($psCmdlet.ParameterSetName -eq 'Path') {
            foreach ($aPath in $Path) {
                if (!(Test-Path -Path $aPath)) {
                    $ex = New-Object System.Management.Automation.ItemNotFoundException "Cannot find path '$aPath' because it does not exist."
                    $category = [System.Management.Automation.ErrorCategory]::ObjectNotFound
                    $errRecord = New-Object System.Management.Automation.ErrorRecord $ex,'PathNotFound',$category,$aPath
                    $psCmdlet.WriteError($errRecord)
                    continue
                }
            
                # Resolve any wildcards that might be in the path
                $provider = $null
                $paths += $psCmdlet.SessionState.Path.GetResolvedProviderPathFromPSPath($aPath, [ref]$provider)
            }
        }
        else {
            foreach ($aPath in $LiteralPath) {
                if (!(Test-Path -LiteralPath $aPath)) {
                    $ex = New-Object System.Management.Automation.ItemNotFoundException "Cannot find path '$aPath' because it does not exist."
                    $category = [System.Management.Automation.ErrorCategory]::ObjectNotFound
                    $errRecord = New-Object System.Management.Automation.ErrorRecord $ex,'PathNotFound',$category,$aPath
                    $psCmdlet.WriteError($errRecord)
                    continue
                }
            
                # Resolve any relative paths
                $paths += $psCmdlet.SessionState.Path.GetUnresolvedProviderPathFromPSPath($aPath)
            }
        }
        
        foreach ($aPath in $paths) {        
            # Output will be on the same directory
            $SanitizedFilePath = $aPath + "-Sanitized.txt"
            'Sanitized File: {0}' -f $SanitizedFilePath | Write-Verbose
            $ConversionTableFilePath = $aPath + "-ConversionTable.csv"
            'Conversion Table File: {0}' -f $ConversionTableFilePath | Write-Verbose 
            
            $TotalLines = Get-Content $aPath | Measure-Object -Line | Select-Object -ExpandProperty Lines
            'Total No.Lines: {0}' -f $TotalLines | Write-Verbose
            if ($TotalLines -eq 0) {
                $TotalLines = 1
            }
            
            Write-Progress -Activity "Redacting sensitive data from file: `"$aPath`"" -Id 1
            
            Get-Content $aPath -Raw:$ReadRaw | Invoke-Redaction -RedactionRule $RedactionRule -Consistent -OutConversionTable 'ConversionTable' -TotalLines $TotalLines | Out-File -FilePath $SanitizedFilePath
            $ConversionTable.Keys | Select-Object -Property @{N = 'NewValue'; E = {$ConversionTable[$_]}}, @{N = 'Original'; E = {$_}} | Sort-Object -Property NewValue | Export-Csv -Path $ConversionTableFilePath @ExportCSVProperties

            [PSCustomObject]@{
                Original        = $aPath
                Sanitized       = $SanitizedFilePath
                ConversionTable = $ConversionTableFilePath            
            }       
        }
    }
    
    end {
        Write-Progress -Activity "[Done] Redacting sensitive data from file: `"$aPath`" [Done]" -Id 1 -Completed
    }
}
<#
.SYNOPSIS
Redact sensitive information from an object
 
.DESCRIPTION
Redact sensitive information from an object as string by defined redaction rules
 
.PARAMETER RedactionRule
Array of redaction rules to redact by
 
.PARAMETER InputObject
String to redact sensitive information from
 
.PARAMETER Consistent
Saves discovered values in a ConversionTable (hash table), when the same values disceverd again they are replaced with the same string that was generated the first time from the redaction rule NewValue function or NewValue formatted string.
It uses a uniqueness value to generate new value from the redaction rule (if applicable).
if Consistent is ommitted generation of new value from redaction rule's NewValues is based on current line number.
 
.PARAMETER OutConversionTable
Creates a variable with the specified name and the ConversionTable as its value.
 
.PARAMETER AsObject
Return an object with the old string, the processed string, line number and if the string was changed or not instead of just a processed string.
 
.PARAMETER TotalLines
Number of lines that are going to be processed over the pipeline.
Relevant for showing informative progress bar.
 
.EXAMPLE
Replace all a-z letters with '+' sign
$RedactionRule = New-RedactionRule -Pattern '[a-z]' -NewValueString '+'
ipconfig /all | Invoke-Redaction -RedactionRule $RedactionRule
 
.EXAMPLE
Replace all service names that start with the letter 's' with 's_{0}', where {0} is replaced by uniqueness factor.
Each unique serivce name will be replaced with a unique new value 's_{0}' and it will stay consistent if the service shows up multiple times.
$RedactionRule = New-RedactionRule -Pattern '(?<=\s)[Ss].+' -NewValueString 's_{0}'
Get-Process | Out-String | Invoke-Redaction -RedactionRule $RedactionRule -Consistent
 
.NOTES
 
#>

function Invoke-Redaction {
    [Alias('Invoke-Sanitization', 'irdac', 'isntz')]
    [CmdletBinding()]
    param(
        [Parameter(Mandatory = $true, 
            Position = 0)]
        [RedactionRule[]]$RedactionRule,
        # One line string
        [Parameter(Mandatory = $true,  
            ValueFromPipeline = $true,
            Position = 1)]
        [AllowEmptyString()] # Incoming lines can be empty, so applied because of the Mandatory flag
        [psobject]
        $InputObject,
        # Requires $ConversionTable but if it won't be provided, empty hash table for $ConversionTable will be initialized instead
        [switch]
        $Consistent,
        [switch]
        $AsObject,
        [ValidateRange(1, [int]::MaxValue)]
        [int]
        $TotalLines = 1
    )

    DynamicParam {
        if ($Consistent) {
            $ParameterName = 'OutConversionTable'
            $RuntimeParameterDictionary = New-Object System.Management.Automation.RuntimeDefinedParameterDictionary
            $AttributeCollection = New-Object System.Collections.ObjectModel.Collection[System.Attribute]
            
            $ValidateNotNullOrEmptyAttribute = New-Object System.Management.Automation.ValidateNotNullOrEmptyAttribute
            $AttributeCollection.Add($ValidateNotNullOrEmptyAttribute)
            
            $ParameterAttribute = New-Object System.Management.Automation.ParameterAttribute
            $AttributeCollection.Add($ParameterAttribute)
            
            $RuntimeParameter = New-Object System.Management.Automation.RuntimeDefinedParameter($ParameterName, [string], $AttributeCollection)
            $RuntimeParameterDictionary.Add($ParameterName, $RuntimeParameter)
            
            return $RuntimeParameterDictionary
        }
    }

    Begin {
        if ($Consistent) {
            $OutConversionTable = $PSBoundParameters[$ParameterName]            
            $ConversionTable = @{}
            $Uniqueness = 0
        }

        #region Write-Progress calculation block initialization
        $PercentComplete = 0
        $PercentStep = 100 / $TotalLines
        [double]$AverageTime = 0
        [int]$SecondsRemaining = $AverageTime * $TotalLines
        $StopWatch = [System.Diagnostics.Stopwatch]::new()
        $StopWatch.Start()
        #endregion

        $LineNumber = 0
    }

    Process {
        $CurrentString = $InputObject.ToString()
        $CurrentStringChanged = $false

        foreach ($Rule in $RedactionRule) {
            $Matches = Select-String -InputObject $CurrentString -Pattern $Rule.Pattern -AllMatches | Select-Object -ExpandProperty Matches | Sort-Object -Property Index -Descending # Sort Descending is required so the replacments won't overwrite each other
            if ($Matches) {
                $CurrentStringChanged = $true
                $StrSB = New-Object System.Text.StringBuilder($CurrentString)
                Foreach ($Match in $Matches) {
                    $MatchedValue = $Match.Value

                    'MatchedValue = {0}' -f $MatchedValue | Write-Verbose

                    if ($Consistent) {
                        if ($null -eq $ConversionTable[$MatchedValue]) {
                            # MatchedValue doesn't exist in the ConversionTable
                            # Adding MatchedValue to the ConversionTable, add it with line number (if {0} is specified in $NewValue)
                            $ConversionTable[$MatchedValue] = $Rule.Evaluate($Uniqueness)
                            'Adding new value to the conversion table: $ConvetionTable[{0}] = {1}' -f $MatchedValue, $ConversionTable[$MatchedValue] | Write-Verbose 
                            $Uniqueness++
                        }

                        # This MatchedValue exists, use it.
                        $Replacement = $ConversionTable[$MatchedValue]
                    }
                    else {
                        $Replacement = $Rule.Evaluate($LineNumber)
                    }

                    $null = $StrSB.Remove($Match.Index, $Match.Length)
                    $null = $StrSB.Insert($Match.Index, $Replacement)
                }

                $CurrentString = $StrSB.ToString()
            }
        } # foreach($Rule in $ReductionRule)

        if ($AsObject) {
            $OutputProperties = @{
                LineNumber    = $LineNumber
                CurrentString = $CurrentString
                Original      = $InputObject
                Changed       = $CurrentStringChanged
            }

            $OutputPropertiesList = 'LineNumber', 'CurrentString', 'Original', 'Changed'

            if ($Consistent) {
                $OutputProperties['Uniqueness'] = $Uniqueness
                $OutputPropertiesList += 'Uniqueness'
            }

            New-Object -TypeName PSCustomObject -Property $OutputProperties | Select-Object $OutputPropertiesList
        }
        else {
            $CurrentString
        }

        #region Write-Progress calculation block
        if ($TotalLines -gt $LineNumber) {
            $PercentComplete += $PercentStep
            $ElapsedSeconds = $StopWatch.Elapsed.TotalSeconds
            $StopWatch.Restart()
            [double]$AverageTime = ($AverageTime * $LineNumber + $ElapsedSeconds) / ($LineNumber + 1)
            [int]$SecondsRemaining = $AverageTime * ($TotalLines - $LineNumber)
            'L = {0} | Avg = {1} | Remain(S) = {2}' -f $LineNumber, $AverageTime, $ElapsedSeconds, $SecondsRemaining | Write-Debug
        }

        Write-Progress -Activity "Redacting sensitive data. Line Number: $LineNumber out of $TotalLines" -Id 2 -ParentId 1 -PercentComplete $PercentComplete -SecondsRemaining $SecondsRemaining
        #endregion

        $LineNumber++
    } # Process

    end {
        #region Write-Progress calculation block closing
        $StopWatch.Stop()        
        Write-Progress -Activity "[Done] Redacting sensitive data [Done]" -Id 2 -ParentId 1 -Completed
        #endregion

        if (-not [string]::IsNullOrWhiteSpace($OutConversionTable)) {
            '$PSCmdlet.MyInvocation.CommandOrigin: {0}' -f $PSCmdlet.MyInvocation.CommandOrigin | Write-Debug
            if ($PSCmdlet.MyInvocation.CommandOrigin -eq 'Runspace') {
                $PSCmdlet.SessionState.PSVariable.Set($OutConversionTable, $ConversionTable)
            }
            else {
                # CommandOrigin: Internal
                Set-Variable -Name $OutConversionTable -Value $ConversionTable -Scope 2
            }
        }
    }
}
<#
.SYNOPSIS
Creates new redaction rule.
 
.DESCRIPTION
Creates new redaction rule with regex pattern to look for and NewValue to replace with.
 
.PARAMETER Pattern
Regex pattern
 
.PARAMETER NewValueFunction
Script block to generate new generic data, the result is then put instead of the original value.
This script block can accept at most 1 int parameter with $args[0] or declare variable in param() block
 
.PARAMETER NewValueString
String value to be replaced instead of pattern. The string can contain place holder {0}, and it will be replaced with uniqueness factor.
 
.PARAMETER CommonRule
Predefined rules - patterns and values
 
.EXAMPLE
New-RedactionRule '(?<=\().*(?=\))' 'Process_{0}'
 
.EXAMPLE
Mark '[a-z]' { [long]$p = $args[0]; [char]($p % 26 + 65) }
 
.EXAMPLE
Mark -CommonRule IPV4Address
 
.NOTES
 
#>

Function New-RedactionRule {
    [Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSUseShouldProcessForStateChangingFunctions", "")]
    [Alias('New-SanitizationRule','New-MarkingRule','Mark')] # Usually Single word is an automatic alias for Get-<SingleWord>
    [OutputType([RedactionRule])]
    [CmdletBinding(DefaultParameterSetName = 'CustomFunction')]
    param(
        # Regex pattern with 1 named capturing group at most
        [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'CustomString')]
        [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'CustomFunction')]
        [string]$Pattern,
        # Value can contain {0} so counter value will be added
        [Parameter(Mandatory = $true, Position = 1, ParameterSetName = 'CustomFunction')]
        [scriptblock]$NewValueFunction,
        [Parameter(Mandatory = $true, Position = 1, ParameterSetName = 'CustomString')]
        [String]$NewValueString,
        [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'Common')]
        [ValidateSet('IPV4Address')]
        [string]$CommonRule
    )

    if ($PSCmdlet.ParameterSetName -eq 'Common') {
        $Script:CommonRuleTable[$CommonRule]
    }
    elseif($PSCmdlet.ParameterSetName -eq 'CustomFunction') {
        New-Object RedactionRuleFunction($Pattern, $NewValueFunction)
    }
    elseif($PSCmdlet.ParameterSetName -eq 'CustomString') {
        New-Object RedactionRuleString($Pattern, $NewValueString)
    }
}

$Script:CommonRuleTable = @{
    'IPV4Address' = New-RedactionRule -Pattern '\b(\d{1,3}(\.\d{1,3}){3})\b' -NewValueFunction ${Function:Convert-IPValue}
    #'IPV6Address' = New-RedactionRule -Pattern '\b(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b' -NewValueFunction ${Function:Generate-IPValue}
    #'MACAddress' = New-RedactionRule -Pattern '\b([0-9A-F]{2}[:-]){5}([0-9A-F]{2})\b' -NewValueFunction ${Function:Generate-IPValue}
    #'GUID' = New-RedactionRule -Pattern '\b[{(]?[0-9A-F]{8}[-]?(?:[0-9A-F]{4}[-]?){3}[0-9A-F]{12}[)}]?\b' -NewValueFunction ${Function:Generate-IPValue}
}
Export-ModuleMember -Function * -Alias * -Cmdlet *