pf-regex.ps1
#function Set-RegEx_Common_Patterns{ $global:regex_spaces = '[^\S\n\r\f]' $global:regex_spaces_optional = $regex_spaces + '*?' $global:regex_spaces_required = $regex_spaces + '+?' $global:regex_LineStart = '(?<=^\s*)' $global:regex_LineToEnd = '.*?$' $global:regex_AnyCharMultiline = '(?>.|\n|\r)*?' #} #Set-RegEx_Common_Patterns function Get-RegEx([switch]$CaseSensitive) { begin { $regex_baseOptions = "MultiLine, ExplicitCapture, IgnorePatternWhitespace, CultureInvariant" $options = $regex_baseOptions if (-not $CaseSensitive.IsPresent) { $options += ", IgnoreCase" } $options = [System.Text.RegularExpressions.RegexOptions]$options } process { $pattern = $_ $regex = if ($pattern -is [string]) { new-object regex($_, $options) } else { $pattern } return $regex } } function Get-RegEx_Match($pattern) { begin { $pattern = $pattern | Get-RegEx } process { $regexMatches = if ($_ -is [System.Text.RegularExpressions.Capture]) { $_ } else { $pattern.Matches($_) } return $regexMatches } } function Test-RegEx($pattern, $expected, $expectedCount, [Switch]$EqualAsInput, [Switch]$PassThrough) { begin { $PassThrough = $PassThrough.IsPresent $EqualAsInput = $EqualAsInput.IsPresent if (! $expected -and ( !$expectedCount -or $expectedCount -eq 1)) { $EqualAsInput = $true } if (Test-Path Variable:PSDebugContext) { $pattern | clip } $pattern = $pattern | Get-RegEx } process { $matchesResult = $_ | Get-RegEx_Match $pattern if ($EqualAsInput) { $expected = $_.Trim() } if ($expectedCount -gt 0) { $matchesResult.Count | Assert -eq $expectedCount } if ($expected -is [string]) { $actual = $matchesResult[0].Value $actual | assert -eq $expected } if ($expected -is [hashtable]) { } if ($PassThrough) { $matchesResult } } } function Join-RegEx_Escaped { Process{ [RegEx]::Escape($_).Replace('"','\x22') } } function Join-RegEx_Quantifier($min=0,$max=[int]::MaxValue) { $maxres = if ($max -eq [int]::MaxValue) { '*' } else { $max } if ($min -eq 1 -and $maxres -eq 1 ) { return "" } if ($min -eq 0 -and $maxres -eq 1 ) { return "?" } if ($min -eq 0 -and $maxres -eq '*' ) { return "*"} if ($min -eq 1 -and $maxres -eq '*' ) { return "+"} if ($min -eq $max ) { return "{$max}"} return "{$min,$max}" } function Join-RegEx_Quantifier:::Test() { Join-RegEx_Quantifier -min 1 -max 1 | assert -eq '' Join-RegEx_Quantifier -min 0 -max 1 | assert -eq '?' Join-RegEx_Quantifier -min 2 -max 2 | assert -eq '{2}' Join-RegEx_Quantifier -min 1 -max '*' | assert -eq '+' Join-RegEx_Quantifier -min 1 | assert -eq '+' Join-RegEx_Quantifier -min 0 | assert -eq '*' Join-RegEx_Quantifier -min 2 -max 3 | assert -eq '{2,3}' } function Get-RegEx_GroupName_Valid([switch]$wildcard) { begin { $pattern = '\w\d' if ($wildcard.IsPresent) { $pattern += '*' } } process { ( $_ -replace "[^$pattern]", '_' ).TrimStart('_') } } function Get-RegEx_GroupName_Valid:::Test { '.123/456' | Get-RegEx_GroupName_Valid | assert '123_456' $null | Get-RegEx_GroupName_Valid | assert '' 'abc*' | Get-RegEx_GroupName_Valid -wildcard | assert 'abc*' } function Join-RegEx_Group($group,$min=1,$max=1,[switch]$atomic) { Begin { $quantifier = Join-RegEx_Quantifier -min $min -max $max $group = $group | Get-RegEx_GroupName_Valid $resgroup = if ($group) { "<$group>" } else {':'} } Process{ if ($atomic.IsPresent) { $result = if ($resgroup -eq ':' -and -not $quantifier) { $_ } else { "(?$resgroup$_)" + $quantifier } return "(?>$result)" } else { $result = "(?$resgroup$_)" + $quantifier return $result } } } function Join-RegEx_Group:::test { "a" | Join-RegEx_Group | assert -eq '(?:a)' "a" | Join-RegEx_Group -group 'g' | assert -eq '(?<g>a)' "a" | Join-RegEx_Group -group '' | assert -eq '(?:a)' "a" | Join-RegEx_Group -atomic | assert -eq '(?>a)' "a" | Join-RegEx_Group -group 'g' -atomic | assert -eq '(?>(?<g>a))' "a" | Join-RegEx_Group -group 'g' -atomic -min 0 | assert -eq '(?>(?<g>a)?)' "a" | Join-RegEx_Group -atomic -min 0 | assert -eq '(?>(?:a)?)' } function Join-RegEx_Block($group,$open,$close) { begin { $openPattern = $open | Join-RegEx_Escaped $closePattern = $close | Join-RegEx_Escaped } process { $inPattern = $_ $pattern = ( $openPattern | Join-RegEx_Group ) + ( $inPattern | Join-RegEx_Group -group $group ) + ( $closePattern | Join-RegEx_Group ) return $pattern } } function Join-RegEx_Block:::Test { $pattern = '.*?' | Join-RegEx_Block -group 'match' -open '<block>' -close '</block>' $pattern | clip $actual = '012 <block>abc</block> 345' | Select-RegEx_Values -regexPattern $pattern -groupName 'match' $actual | assert -eq 'abc' } function Join-RegEx_Block_Recursive($group,$open,$close) { # https://www.regular-expressions.info/balancing.html # ^m*(?>(?>(?'open'o)m*)+(?>(?'-open'c)m*)+)+(?(open)(?!))$ begin { $groupOpen = $group + "Open" $groupContent = $group $openPattern = $open | Join-RegEx_Escaped $closePattern = $close | Join-RegEx_Escaped } process { $content = $_ $contentPattern = if ($content) { $content } else { '[^' + $openPattern + $closePattern + ']*' } $pattern = "$contentPattern(?>(?>(?<$groupOpen>$openPattern)$contentPattern)+(?>(?<$groupContent-$groupOpen>$closePattern)$contentPattern)+)+(?($groupOpen)(?!))" return $pattern } } function Join-RegEx_Block_Recursive:::test { $testInput = "1 (2) 3 (4 (5)) (6 7 (8) 9) 0" # $pattern = '\s*\d+\s*' | Join-RegEx_Group | Join-RegEx_Block_Recursive -group 'gp' -open '(' -close ')' $pattern = Join-RegEx_Block_Recursive -group 'gp' -open '(' -close ')' $testInput | Select-RegEx_Values -regexPattern $pattern -groupName 'gp' | assert @('2','5','4 (5)','8','6 7 (8) 9') } function Join-RegEx($separator='', $open='', $close='', $beforeitem='', $afteritem='') { Begin { $first = $true $result = $open } Process { if ($first) { $first = $false } else { $result += $separator } $result += $beforeitem + $_ + $afteritem } End { $result += $close $result } } function Join-RegEx_Or($open, $close, $group='', $beforeitem='', $afteritem='', $separator='|') { Begin { $first = $true $result = $open } Process { if ($first) { $first = $false } else { $result += $separator } $result += $beforeitem + $_ + $afteritem } End { $result += $close if ($group) { $result = $result | Join-RegEx_Group -group $group } $result } } function Join-RegEx_Or:::test() { @('a','b') | Join-RegEx_Or | Assert -eq 'a|b' @('a','b') | Join-RegEx_Or -beforeitem '[' -afteritem ']' | Assert -eq '[a]|[b]' @('a','b') | Join-RegEx_Or -open '(' -close ')' | Assert -eq '(a|b)' } function join-RegEx_Spaces { Begin { $first = $true $separator = "|" $beforeitem = $regex_spaces_optional + "#Spaces`n" $afteritem = $regex_spaces_optional + "#Spaces`n" $result = "" } Process { if ($first) { $first = $false } else { $result += $separator } $result += $beforeitem + $_ + $afteritem } End { $result } } function join-RegEx_Spaces:::test { $pattern = @( 'a', 'b', 'c') | join-RegEx_Spaces | Get-RegEx "a b c" | Test-RegEx -pattern $pattern -expectedCount 3 } function Get-RegEx_String($gname='string') { return ( '"' | Join-RegEx_Escaped ) + "(?<$gname>.*?)" + ( '"' | Join-RegEx_Escaped ) | Join-RegEx_Group -atomic } function Get-RegEx_String:::Test { $gname = 'str' $regex = Get-RegEx_String -gname $gname '012"34567890' | Select-RegEx_Values -regex $regex -groupName $gname | assert -eq $null '012"345"67890' | Select-RegEx_Values -regex $regex -groupName $gname | assert -eq "345" '012"345"67"8"90' | Select-RegEx_Values -regex $regex -groupName $gname | assert -eq @("345","8") } function Select-RegEx { [CmdLetBinding()] param( [Parameter(Mandatory=$true)] [String]$regex, [Parameter(Mandatory=$true, ValueFromPipeline=$true)] $value ) process { if ( [String]::IsNullOrEmpty($value) ) { return } $value = $value.ToString() $match = $value -match $regex if ( $match ) { return New-Object PsObject -Property $Matches } } } function Select-RegEx:::test { $testInput = "at cmd, File: line 20" $result = $testInput | Select-RegEx -regex 'at (?<cmd>.+), (?<file>.+): line (?<line>\d+)' # $result.line | assert 20 $prop = '0' $expected = [PSCustomObject]@{ cmd = 'cmd'; line = 20; file = 'File'; $prop = $testInput } ($result|ConvertFrom-PSObject_ToHashtable) | assert ($expected|ConvertFrom-PSObject_ToHashtable) } function Select-RegEx_object( $fields=$null) { process { $regexMatches = $_ foreach ($match in $regexMatches) { $hash = [Ordered]@{} $matchedGroups = $match.Groups | Where-Object { $_.Success -and $_.Name } if ($null -eq $fields) { foreach ($group in $matchedGroups) { #$value = $group.value $value = $group.Captures | ForEach-Object { $_.Value } $hash.Add($group.Name, $value) } } foreach ($field in $fields) { $group = $matchedGroups | Where-Object { $_.name -eq $field } if ($group) { #$value = $group.value $value = $group.Captures | ForEach-Object { $_.Value } $hash.Add($group.Name, $value) } } if ('#match_char_index' -iin $fields) { $hash.Add('#match_char_index', $match.Index) } if ('#match_length' -iin $fields) { $hash.Add('#match_length', $match.Length) } #return [PSCustomObject]$hash } } } function Select-RegEx_Groups ( $regexPattern, $groupName ) { begin { $regex = $regexPattern | Get-RegEx $groupName = $groupName | Get-RegEx_GroupName_Valid -wildcard } process { $regexMatches = $_ | Get-RegEx_Match $regex if ( -not $regexMatches.Success ) { return } $groups = $regexMatches.Groups | Where-Object { $_.name -like $groupName } return $groups } } function Select-RegEx_Captures ( $regexPattern, $groupName ) { process { $groups = $_ | Select-RegEx_Groups $regexPattern $groupName return $groups.Captures } } function Select-RegEx_Values ( $regexPattern, $groupName, $default = $null ) { process { $captures = $_ | Select-RegEx_Captures $regexPattern $groupName $default if ( -not $captures ) { return $default } return $captures.Value } } function Select-RegEx_Values:::Test { "ABC_Commit" | Select-RegEx_Values -regexPattern "(?<Package>.+)_(?<buildname>.+)" -groupName "Package" | assert 'ABC' "ABCCommit" | Select-RegEx_Values -regexPattern "(?<Package>.+)_(?<buildname>.+)" -groupName "Packages" -default "NoMatch" | assert "NoMatch" } function Get-RegEx_Comment($frame_Index=1, [switch]$prepend ) { $frame = (Get-PSCallStack)[$frame_Index] $result = $frame.Command $result = "#" + ($result -replace '.*?-(.*)', '$1') if ($prepend.IsPresent) { $result = "`n" + $result + "`n" } return $result } function Get-RegEx_Comment:::Test { function Get-frame2 { Get-RegEx_Comment } function Get-Frame1 { Get-frame2 } Get-Frame1 | assert "#frame2" } function Join-RegEx_Comment ([switch]$NoComment) { begin { $comment = Get-RegEx_Comment -frame_Index 2 -prepend $items = if ($NoComment.IsPresent) { @() } else { @($comment) } } process { $items += $_ } end { $result = $items | Join-RegEx if (Test-Path Variable:PSDebugContext ) # Debugger Attached { $result | clip } return $result } } function Join-RegEx_Comment:::Test { @("a","b") | Join-RegEx_Comment | assert "`n#RegEx_Comment:::Test`nab" } function Search-RegEx_Match_WithGroup($groupName,$capture=$null) { begin { $groupName = $groupName | Get-RegEx_GroupName_Valid -wildcard } process { $regexMatch = $_ $group = $regexMatch.Groups | Where-Object { $_.name -like $groupName } if ( $group.Success) { if ($null -eq $capture) { $regexMatch } else { $firstCapture = $group.Captures | Where-Object { $_.Value -like $capture } | Select-Object -First 1 if ($firstCapture) { $regexMatch } } } } } function Search-RegEx_Match_WithGroup:::test { #arrange $testinput = ' @abc123@ @def123@ @abc@ @123@ ' $pattern = '@(?>(?<lower>[a-z]+)|(?<upper>[A-Z]+)|(?<number>\d+))+@' $regex = $pattern | Get-RegEx -CaseSensitive $regexMatches = $testinput | Get-RegEx_Match $regex #act/assert $regexMatches | Search-RegEx_Match_WithGroup -groupName 'lower' | ForEach-Object{ $_.Value} | assert @('@abc123@','@def123@','@abc@') #act/assert $regexMatches | Search-RegEx_Match_WithGroup -groupName 'lower' -capture 'def' | ForEach-Object{ $_.Value} | assert @('@def123@') } function Search-RegEx_Match_WithOutGroup($groupName,$capture=$null) { begin { $groupName = $groupName | Get-RegEx_GroupName_Valid -wildcard } process { $regexMatch = $_ $group = $regexMatch.Groups | Where-Object { $_.name -like $groupName } if ( -not $group.Success) { $regexMatch } else { if ($null -eq $capture) { return } else { $firstCapture = $group.Captures | Where-Object { $_.Value -like $capture } | Select-Object -First 1 if (-not $firstCapture) { $regexMatch } } } } } function Search-RegEx_Match_WithOutGroup:::test { #arrange $testinput = ' @abc123@ @def123@ @abc@ @123@ ' $pattern = '@(?>(?<lower>[a-z]+)|(?<upper>[A-Z]+)|(?<number>\d+))+@' $regex = $pattern | Get-RegEx -CaseSensitive $regexMatches = $testinput | Get-RegEx_Match $regex #act/assert $regexMatches | Search-RegEx_Match_WithOutGroup -groupName 'lower' | ForEach-Object{ $_.Value} | assert @('@123@') #act/assert $regexMatches | Search-RegEx_Match_WithOutGroup -groupName 'lower' -capture 'def' | ForEach-Object{ $_.Value} | assert @('@abc123@','@abc@','@123@') } |