src/data.ps1
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSAvoidUsingPlainTextforPassword', '', Scope = 'Function', Target = 'Import-Excel')] Param() # Nouns with the same singular and plural forms $SameSingularPlural = @( 'accommodation' 'advice' 'alms' 'aircraft' 'aluminum' 'barracks' 'bison' 'binoculars' 'bourgeois' 'breadfruit' 'buffalo' 'cannon' 'caribou' 'chalk' 'chassis' 'chinos' 'clippers' 'clothing' 'cod' 'concrete' 'corps' 'correspondence' 'crossroads' 'data' 'deer' 'doldrums' 'dungarees' 'education' 'eggfruit' 'elk' 'equipment' 'eyeglasses' 'fish' 'flares' 'flour' 'food' 'fruit' 'furniture' 'gallows' 'goldfish' 'grapefruit' 'greenfly' 'grouse' 'haddock' 'halibut' 'head' 'headquarters' 'help' 'homework' 'hovercraft' 'ides' 'information' 'insignia' 'jackfruit' 'jeans' 'knickers' 'knowledge' 'kudos' 'leggings' 'lego' 'luggage' 'mathematics' 'money' 'moose' 'monkfish' 'mullet' 'nailclippers' 'news' 'nitrogen' 'offspring' 'oxygen' 'pants' 'pyjamas' 'passionfruit' 'pike' 'pliers' 'police' 'premises' 'reindeer' 'rendezvous' 'rice' 'salmon' 'scissors' 'series' 'shambles' 'sheep' 'shellfish' 'shorts' 'shrimp' 'smithereens' 'spacecraft' 'species' 'squid' 'staff' 'starfruit' 'statistics' 'stone' 'sugar' 'swine' 'tights' 'tongs' 'traffic' 'trousers' 'trout' 'tuna' 'tweezers' 'wheat' 'whitebait' 'wood' 'you' ) # Nouns with irregular singular/plural forms $Irregular = @{ 'child' = 'children' 'cow' = 'cattle' 'foot' = 'feet' 'goose' = 'geese' 'man' = 'men' 'move' = 'moves' 'person' = 'people' 'radius' = 'radii' 'sex' = 'sexes' 'tooth' = 'teeth' 'woman' = 'women' } function Format-MoneyValue { <# .SYNOPSIS Helper function to create human-readable money (USD) values as strings. .EXAMPLE 42 | Format-MoneyValue # '$42.00' .EXAMPLE 55000123.50 | Format-MoneyValue -Symbol ¥ # '¥55,000,123.50' .EXAMPLE 700 | Format-MoneyValue -Symbol £ -Postfix # '700.00£' #> [CmdletBinding()] [Alias('money')] [OutputType([String])] Param( [Parameter(Mandatory = $True, Position = 0, ValueFromPipeline = $True)] $Value, [String] $Symbol = '$', [Switch] $AsNumber, [Switch] $Postfix ) Process { function Get-Magnitude { Param($Value) [Math]::Log([Math]::Abs($Value), 10) } switch -Wildcard ($Value.GetType()) { 'Int*' { $Sign = [Math]::Sign($Value) $Output = [Math]::Abs($Value).ToString() $OrderOfMagnitude = Get-Magnitude $Value if ($OrderOfMagnitude -gt 3) { $Position = 3 $Length = $Output.Length for ($Index = 1; $Index -le [Math]::Floor($OrderOfMagnitude / 3); $Index++) { $Output = $Output | Invoke-InsertString ',' -At ($Length - $Position) $Position += 3 } } if ($Postfix) { "$(if ($Sign -lt 0) { '-' } else { '' })${Output}.00$Symbol" } else { "$(if ($Sign -lt 0) { '-' } else { '' })$Symbol${Output}.00" } } 'Double' { $Sign = [Math]::Sign($Value) $Output = [Math]::Abs($Value).ToString('#.##') $OrderOfMagnitude = Get-Magnitude $Value if (($Output | ForEach-Object { $_ -split '\.' } | Select-Object -Skip 1).Length -eq 1) { $Output += '0' } if (($Value - [Math]::Truncate($Value)) -ne 0) { if ($OrderOfMagnitude -gt 3) { $Position = 6 $Length = $Output.Length for ($Index = 1; $Index -le [Math]::Floor($OrderOfMagnitude / 3); $Index++) { $Output = $Output | Invoke-InsertString ',' -At ($Length - $Position) $Position += 3 } } if ($Postfix) { "$(if ($Sign -lt 0) { '-' } else { '' })$Output$Symbol" } else { "$(if ($Sign -lt 0) { '-' } else { '' })$Symbol$Output" } } else { ($Value.ToString() -as [Int]) | Format-MoneyValue } } 'String' { $Value = $Value -replace ',', '' $Sign = if (([Regex]'\-\$').Match($Value).Success) { -1 } else { 1 } if (([Regex]'\$').Match($Value).Success) { $Output = (([Regex]'(?<=(\$))[0-9]*\.?[0-9]{0,2}').Match($Value)).Value } else { $Output = (([Regex]'[\-]?[0-9]*\.?[0-9]{0,2}').Match($Value)).Value } $Type = if ($Output.Contains('.')) { [Double] } else { [Int] } $Output = $Sign * ($Output -as $Type) if (-not $AsNumber) { $Output = $Output | Format-MoneyValue } $Output } Default { throw 'Format-MoneyValue only accepts strings and numbers' } } } } function Get-Plural { <# .SYNOPSIS Return plural form of a word .EXAMPLE 'boot' | plural # 'boots' .NOTES Adapted from the PHP library, [Text-Statistics](https://github.com/DaveChild/Text-Statistics) #> [CmdletBinding()] [Alias('plural')] [OutputType([String])] Param( [Parameter(Mandatory = $True, Position = 0, ValueFromPipeline = $True)] [String] $Word ) Begin { $Plural = @( ('(quiz)$', '${1}zes') ('^(ox)$', '${1}en') ('([m|l])ouse$', '${1}ice') ('(matr|ind|vert)[i|e]x$', '${1}ices') ('(x|ch|ss|sh)$', '${1}es') ('([^aeiouy]|qu)y$', '${1}ies') ('(hive)$', '${1}s') ('(?:([^f])fe|([lr])f)$', '${1}${2}ves') ('(shea|lea|loa|thie)f$', '${1}ves') ('sis$', 'ses') ('([ti])um$', '${1}a') ('(tomat|potat|ech|her|vet)o$', '${1}oes') ('(bu)s$', '${1}ses') ('(alias)$', '${1}es') ('(octop)us$', '${1}i') ('(ax|test)is$', '${1}es') ('(us)$', '${1}es') ('s$', 's') ) } Process { switch ($Word.ToLower()) { { $_ -in $SameSingularPlural } { $Word Break } { $_ -in $Irregular.Keys } { $Irregular.$_ Break } { $_ -in $Irregular.Values } { $Word Break } Default { $Result = "${Word}s" $Pairs = Invoke-Chunk $Plural -Size 2 foreach ($Pair in $Pairs) { [Regex]$Re, $PluralVersion = $Pair if ($Word -match $Re) { $Result = $Word -replace $Re, $PluralVersion Break } } $Result } } } } function Get-Singular { <# .SYNOPSIS Return singular form of a word .EXAMPLE 'boots' | singular # 'boot' .NOTES Adapted from the PHP library, [Text-Statistics](https://github.com/DaveChild/Text-Statistics) #> [CmdletBinding()] [Alias('singular')] [OutputType([String])] Param( [Parameter(Mandatory = $True, Position = 0, ValueFromPipeline = $True)] [String] $Word ) Begin { $Singular = @( ('(quiz)zes$', '${1}') ('(matr)ices$', '${1}ix') ('(vert|ind)ices$', '${1}ex') ('^(ox)en$', '${1}') ('(alias)es$', '${1}') ('(octop|vir)i$', '${1}us') ('(cris|ax|test)es$', '${1}is') ('(shoe)s$', '${1}') ('(o)es$', '${1}') ('(bus)es$', '${1}') ('([m|l])ice$', '${1}ouse') ('(x|ch|ss|sh)es$', '${1}') ('(m)ovies$', '${1}ovie') ('(s)eries$', '${1}eries') ('([^aeiouy]|qu)ies$', '${1}y') ('([lr])ves$', '${1}f') ('(tive)s$', '${1}') ('(hive)s$', '${1}') ('(li|wi|kni)ves$', '${1}fe') ('(shea|loa|lea|thie)ves$', '${1}f') ('(^analy)ses$', '${1}sis') ('((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$', '${1}${2}sis') ('([ti])a$', '${1}um') ('(n)ews$', '${1}ews') ('(h|bl)ouses$', '${1}ouse') ('(corpse)s$', '${1}') ('(us)es$', '${1}') ('s$', '') ) } Process { switch ($Word.ToLower()) { { $_ -in $SameSingularPlural } { $Word Break } { $_ -in $Irregular.Keys } { $Word Break } { $_ -in $Irregular.Values } { ($Irregular | Invoke-ObjectInvert).$_ Break } Default { $Result = $Word $Pairs = Invoke-Chunk $Singular -Size 2 foreach ($Pair in $Pairs) { [Regex]$Re, $SingularVersion = $Pair if ($Word -match $Re) { $Result = $Word -replace $Re, $SingularVersion Break } } $Result } } } } function Get-SyllableCount { <# .SYNOPSIS Get number of syllables in an English word (used within Get-Readability function) .EXAMPLE 'hello' | Get-SylallableCount # 2 .NOTES Adapted from Node.js library, [words/syllable](https://github.com/words/syllable#inspiration), which was based on the PHP library, [Text-Statistics](https://github.com/DaveChild/Text-Statistics), which was inspired by the Perl module, [Lingua::EN::Syllable](https://metacpan.org/pod/Lingua::EN::Syllable) #> [CmdletBinding()] [OutputType([Int])] Param( [Parameter(Position = 0, ValueFromPipeline = $True)] [AllowEmptyString()] [String] $Word ) Begin { # Match single syllable pre- and suffixes $Single = [Regex]'^(?:un|fore|ware|none?|out|post|sub|pre|pro|dis|side|some)|(?:ly|less|some|ful|ers?|ness|cians?|ments?|ettes?|villes?|ships?|sides?|ports?|shires?|[gnst]ion(?:ed|s)?)$' # Match double syllable pre- and suffixes $Double = [Regex]'^(?:above|anti|ante|counter|hyper|afore|agri|infra|intra|inter|over|semi|ultra|under|extra|dia|micro|mega|kilo|pico|nano|macro|somer)|(?:fully|berry|woman|women|edly|union|((?:[bcdfghjklmnpqrstvwxz])|[aeiou])ye?ing)$' # Match triple syllabble suffixes $Triple = [Regex]'(creations?|ology|ologist|onomy|onomist)$' # Counted as two, but should be one $SingleSyllabicOne = [Regex]'awe($|d|so)|cia(?:l|$)|tia|cius|cious|[^aeiou]giu|[aeiouy][^aeiouy]ion|iou|sia$|eous$|[oa]gue$|.[^aeiuoycgltdb]{2,}ed$|.ely$|^jua|uai|eau|^busi$|(?:[aeiouy](?:[bcfgklmnprsvwxyz]|ch|dg|g[hn]|lch|l[lv]|mm|nch|n[cgn]|r[bcnsv]|squ|s[chkls]|th)ed$)|(?:[aeiouy](?:[bdfklmnprstvy]|ch|g[hn]|lch|l[lv]|mm|nch|nn|r[nsv]|squ|s[cklst]|th)es$)' $SingleSyllabicTwo = [Regex]'[aeiouy](?:[bcdfgklmnprstvyz]|ch|dg|g[hn]|l[lv]|mm|n[cgns]|r[cnsv]|squ|s[cklst]|th)e$' # Counted as one, but should be two $DoubleSyllabicOne = [Regex]'(?:([^aeiouy])\\1l|[^aeiouy]ie(?:r|s?t)|[aeiouym]bl|eo|ism|asm|thm|dnt|snt|uity|dea|gean|oa|ua|react?|orbed|shred|eings?|[aeiouy]sh?e[rs])$' $DoubleSyllabicTwo = [Regex]'creat(?!u)|[^gq]ua[^auieo]|[aeiou]{3}|^(?:ia|mc|coa[dglx].)|^re(app|es|im|us)|(th|d)eist' $DoubleSyllabicThree = [Regex]'[^aeiou]y[ae]|[^l]lien|riet|dien|iu|io|ii|uen|[aeilotu]real|real[aeilotu]|iell|eo[^aeiou]|[aeiou]y[aeiou]' $DoubleSyllabicFour = [Regex]'[^s]ia' # Nouns with problematic syllables $Problematic = @{ 'abalone' = 4 'abare' = 3 'abbruzzese' = 4 'abed' = 2 'aborigine' = 5 'abruzzese' = 4 'acreage' = 3 'adame' = 3 'adieu' = 2 'adobe' = 3 'anemone' = 4 'anyone' = 3 'apache' = 3 'aphrodite' = 4 'apostrophe' = 4 'ariadne' = 4 'cafe' = 2 'café' = 2 'calliope' = 4 'catastrophe' = 4 'chile' = 2 'chloe' = 2 'circe' = 2 'cliche' = 2 'cliché' = 2 'contrariety' = 4 'coyote' = 3 'daphne' = 2 'epitome' = 4 'eurydice' = 4 'euterpe' = 3 'every' = 2 'everywhere' = 3 'forever' = 3 'gethsemane' = 4 'guacamole' = 4 'hermione' = 4 'hyperbole' = 4 'jesse' = 2 'jukebox' = 2 'karate' = 3 'machete' = 3 'maybe' = 2 'naive' = 2 'newlywed' = 3 'ninety' = 2 'penelope' = 4 'people' = 2 'persephone' = 4 'phoebe' = 2 'pulse' = 1 'queue' = 1 'recipe' = 3 'reptilian' = 4 'resumé' = 2 'riverbed' = 3 'scotia' = 3 'sesame' = 3 'shoreline' = 2 'simile' = 3 'snuffleupagus' = 5 'sometimes' = 2 'syncope' = 3 'tamale' = 3 'waterbed' = 3 'wednesday' = 2 'viceroyship' = 3 'yosemite' = 4 'zoë' = 2 } $NeedToBeFixed = @{ # all counts are (correct - 1) 'ayo' = 2 'australian' = 3 'dionysius' = 5 'disbursement' = 3 'discouragement' = 4 'disenfranchisement' = 5 'disengagement' = 4 'disgraceful' = 3 'diskette' = 2 'displacement' = 3 'distasteful' = 3 'distinctiveness' = 4 'distraction' = 3 'geoffrion' = 4 'mcquaid' = 2 'mcquaide' = 2 'mcquaig' = 2 'mcquain' = 2 'nonbusiness' = 3 'nonetheless' = 3 'nonmanagement' = 4 'outplacement' = 3 'outrageously' = 4 'postponement' = 3 'preemption' = 3 'preignition' = 4 'preinvasion' = 4 'preisler' = 3 'preoccupation' = 5 'prevette' = 2 'probusiness' = 3 'procurement' = 3 'pronouncement' = 3 'sidewater' = 3 'sidewinder' = 3 'ungerer' = 3 } $Apostrophe = [Regex]"['’]" $NonAlphabetic = [Regex]'[^a-z]' $Count = 0 } Process { $Syllables = { Param($Word) switch ($Word) { { $_.Length -eq 0 } { 0 Break } { $_.Length -in 1, 2 } { 1 Break } { $_ -in $Problematic.Keys } { $Problematic.$_ Break } { (Get-Singular $_) -in $Problematic.Keys } { $Word = (Get-Singular $_) $Problematic.$Word Break } { $_ -in $NeedToBeFixed.Keys } { $NeedToBeFixed.$_ Break } { (Get-Singular $_) -in $NeedToBeFixed.Keys } { $Word = Get-Singular $_ $NeedToBeFixed.$Word Break } Default { $Count += (3 * ($Word | Select-String -Pattern $Triple).Matches.Value.Count) $Word = $Word -replace $Triple, '' $Count += (2 * ($Word | Select-String -Pattern $Double).Matches.Value.Count) $Word = $Word -replace $Double, '' $Count += (1 * ($Word | Select-String -Pattern $Single).Matches.Value.Count) $Word = $Word -replace $Single, '' $Count -= ($Word | Select-String -Pattern $SingleSyllabicOne).Matches.Value.Count $Count -= ($Word | Select-String -Pattern $SingleSyllabicTwo).Matches.Value.Count $Count += ($Word | Select-String -Pattern $DoubleSyllabicOne).Matches.Value.Count $Count += ($Word | Select-String -Pattern $DoubleSyllabicTwo).Matches.Value.Count $Count += ($Word | Select-String -Pattern $DoubleSyllabicThree).Matches.Value.Count $Count += ($Word | Select-String -Pattern $DoubleSyllabicFour).Matches.Value.Count $Count += ($Word -split [Regex]'[^aeiouy]+' | Where-Object { $_ -ne '' }).Count $Count } } } $TotalSyllables = 0 $Parts = (($Word -replace $Apostrophe, '') -split '\b') foreach ($Part in $Parts) { $Part = $Part.ToLower() -replace $NonAlphabetic, '' $TotalSyllables += (& $Syllables $Part) } $TotalSyllables } } function Import-Excel { <# .SYNOPSIS Import the rows of an Excel worksheet as a 2-dimensional array .PARAMETER ColumnHeaders Custom values to be used as header names. Must have same count as Excel data columns. .PARAMETER FirstRowHeaders Treat first row as headers. Exclude first row cells from Cells and Rows in output. Note: When an empty cell is encountered, a placeholder will be used of the form, column<COLUMN NUMBER> .PARAMETER Peek Return first row of data only. Useful for quickly identifying the shape of the data without importing the entire file. #> [CmdletBinding()] [OutputType([System.Collections.Hashtable])] Param( [Parameter(Mandatory = $True)] [String] $Path, [String] $WorksheetName, [Array] $ColumnHeaders, [String] $Password, [Switch] $FirstRowHeaders, [String] $EmptyValue = 'EMPTY', [Switch] $ShowProgress, [Switch] $Peek ) $FileName = Get-StringPath $Path $Excel = New-Object -ComObject 'Excel.Application' $Excel.Visible = $False if ($ShowProgress) { Write-Progress -Activity 'Importing Excel data' -Status "Loading $FileName" } $Workbook = if (-not $Password) { $Excel.workbooks.open($FileName) } else { $Excel.workbooks.open($FileName, 0, 0, $True, $Password) } $Worksheet = if ($WorksheetName) { $Workbook.Worksheets.Item($WorksheetName) } else { $Workbook.Worksheets(1) } $RowCount = if ($Peek) { 1 } else { $Worksheet.UsedRange.Rows.Count } $ColumnCount = $Worksheet.UsedRange.Columns.Count $StartIndex = if ($FirstRowHeaders) { 2 } else { 1 } $Headers = if ($FirstRowHeaders) { $RowCount-- 1..$ColumnCount | ForEach-Object { $Name = $Worksheet.Cells.Item(1, $_).Value2 if ($Null -eq $Name) { "Column${_}" } else { $Name } } } elseif ($ColumnHeaders.Count -eq $ColumnCount) { $ColumnHeaders } else { 1..$ColumnCount | ForEach-Object { "Column${_}" } } $Rows = New-Object 'System.Collections.ArrayList' $Cells = @() for ($RowIndex = $StartIndex; $RowIndex -le $RowCount; $RowIndex++) { if ($ShowProgress) { Write-Progress -Activity 'Importing Excel data' -Status "Processing row ($RowIndex of ${RowCount})" -PercentComplete (($RowIndex / $RowCount) * 100) } $Row = @{} for ($ColumnIndex = 1; $ColumnIndex -le $ColumnCount; $ColumnIndex++) { $Value = $Worksheet.Cells.Item($RowIndex, $ColumnIndex).Value2 $Element = if ($Null -eq $Value) { $EmptyValue } else { $Value } $Row[$Headers[$ColumnIndex - 1]] = $Element $Cells += $Element } [Void]$Rows.Add($Row) } if ($ShowProgress) { Write-Progress -Activity 'Importing Excel data' -Completed } $Workbook.Close() $Excel.Quit() @{ Size = @($RowCount, $ColumnCount) Headers = $Headers Cells = $Cells Rows = $Rows } } function Import-Raw { <# .SYNOPSIS Import large files as lines of raw text using StreamReader Note: For large files, this function can be 2-10 times faster than Get-Content or Import-Csv .PARAMETER Transform Function that will be applied to every line. .PARAMETER Peek Return only the first line. .EXAMPLE Import-Raw -File 'data.csv' -Transform { Param($Line) $Line -split ',' } .EXAMPLE Import-Raw 'data.csv' -Peek #> [CmdletBinding()] Param( [Parameter(Mandatory = $True, Position = 0)] [ValidateScript({ Test-Path $_ })] [String] $File, [Parameter(Position = 1)] [ScriptBlock] $Transform, [Switch] $Peek ) $Stream = New-Object -Type System.IO.StreamReader -ArgumentList (Get-Item $File) do { $Line = $Stream.ReadLine() if ($Transform) { & $Transform $Line } else { $Line } } while (-not $Peek -and $Stream.Peek() -ge 0) [Void]$Stream.Dispose() } |