MeasureTrace.Database.psm1

<#
  Written and shared by Microsoft employee Matthew Reynolds in the spirit of "Small OSS libraries, tool, and sample code" OSS policy
  MIT license https://github.com/MatthewMWR/MeasureTrace/blob/master/LICENSE
#>


Set-StrictMode -Version Latest

<#
.Synopsis
   Adds a given Measured Trace to a database
.DESCRIPTION
   Typically used indirectly via call to Measure-MtDbNewTracesFromFolder
.EXAMPLE
   dir c:\MyTracesFolder | Measure-Trace | Add-MtdbTrace -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI'
#>

function Add-MtDbTrace {
    param(
        [Parameter(Mandatory=$true)]
        [string] $ConnectionString
        ,
        [Parameter(Mandatory=$true, ValueFromPipeline=$true)]
        [MeasureTrace.TraceModel.Trace[]]$MeasuredTrace
    )
    begin{
        $repository = Connect-MtDbRepository -ConnectionString $ConnectionString
    }
    process{
        foreach($trace in $MeasuredTrace){
            $repository.AddTraceWithMeasurements($trace)
            $repository.SaveChanges()
        }
    }
    end{
        $repository.Dispose()
    }
}

<#
.Synopsis
   Creates a connection to the MeasureTrace database, attempting to provision as needed
.DESCRIPTION
   Typically used indirectly via call to Measure-MtDbNewTracesFromFolder
.EXAMPLE
   Connect-MtDbRepository -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI'
#>

function Connect-MtDbRepository {
    param(
        [Parameter(Mandatory=$true)]
        [string]$ConnectionString
    )
    $r = [MeasureTrace.Database]::Connect($ConnectionString)
    $r
}

<#
.Synopsis
   Determine whether a specific trace file is already represented in a DB
.DESCRIPTION
   Typically used indirectly via call to Measure-MtDbNewTracesFromFolder
.EXAMPLE
   Compare-MtDbTraceFileStatus -Path d:\MyEtlTraces -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI'
#>

function Compare-MtDbTraceFileStatus {
    param(
        ## Path to a trace file
        [Parameter(ValueFromPipelineByPropertyName=$true,Mandatory=$true)]
        [Alias('Path')]
        [string[]]$FullName,
        ## For connection to database. See Example section
        [string]$ConnectionString
    )
    begin{
        $repo = Connect-MtDbRepository -ConnectionString $ConnectionString
    }
    process{
        foreach($pathX in $FullName){
            $packageFileName = Split-Path -Path $pathX -Leaf
            [pscustomobject]@{
                PackageFileNameFull = $pathX
                PackageFIleName = $packageFileName
                IsInDatabase = @($repo.GetTrace( $packageFileName )).Count -as [bool]
            }
        }
    }
}

<#
.Synopsis
   Determine which traces in a folder are already represented in a DB
.DESCRIPTION
   Compares a given folder of traces against a database of parsed traces.
   Returns an object per trace which describes the status. Typically used
   indirectly via call to Measure-MtDbNewTracesFromFolder
.EXAMPLE
   Compare-MtDbFolderVsRepository -Path d:\MyEtlTraces -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI'
#>

function Compare-MtDbFolderVsRepository{
    param(
        ## Path to a folder containing ETL traces
        [Parameter(ValueFromPipelineByPropertyName=$true,Mandatory=$true)]
        [Alias('Path')]
        [string[]]$FullName,
        ## For connection to database. See Example section
        [string]$ConnectionString,
        [string[]]$Filter = @("*.etl","*.zip")
    )
    
    foreach($folder in $FullName){
        foreach($filterX in $Filter){
            Get-ChildItem -Path $folder -Filter $filterX -Recurse -File | Compare-MtDbTraceFileStatus -ConnectionString $ConnectionString
        }
    }
}

<#
.Synopsis
   Process traces from a folder into MeasuredTrace representation in a database
.DESCRIPTION
   Compares a given folder of traces against a database of parsed traces. For traces which are not yet
   reflected in the db it attempts to Measure them and export Measurements to the database
.EXAMPLE
   Measure-MtDbNewTracesFromFolder -Path d:\MyEtlTraces -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI'
.EXAMPLE
   Measure-MtDbNewTracesFromFolder -Path d:\MyEtlTraces -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI' -MaxCount 100 -Parallel
.EXAMPLE
    ## Continuous ingestion as traces arrive
    while( $true ){
        Measure-MtDbNewTracesFromFolder -Path d:\MyEtlTraces -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI' -MaxCount 100 -Parallel
        Start-Sleep -Seconds 60
    }
#>

function Measure-MtDbNewTracesFromFolder{
    param(
        ## Path to a folder containing ETL traces
        [Parameter(ValueFromPipelineByPropertyName=$true,Mandatory=$true)]
        [Alias('Path')]
        [string[]]$FullName,
        ## For connection to database. See Example section
        [string]$ConnectionString = 'server=(localdb)\MsSqlLocalDb;database=MeasureTrace;integrated security=true',
        ## Max number of new traces to process on this pass
        [int]$MaxCount = 50,
        ## Enables parallel processing via RSJobs
        [switch]$Parallel,
        [string[]]$Filter = @("*.etl","*.zip")
    )
    begin{
        $batchLabel = [Guid]::NewGuid().ToString()
        $parallelThrottle = ([Environment]::ProcessorCount * 1.5) -as [int]
    }
    process{
        Compare-MtDbFolderVsRepository -Path $FullName -ConnectionString $ConnectionString -Filter $Filter |
          Where-Object {-not $_.IsInDatabase} | Select-Object -First $MaxCount |
          Foreach-Object{
            if(-not $Parallel){
                Invoke-MtDbMeasureTraceWithFallbackToEmptyTrace -PackageFileNameFull $_.PackageFileNameFull | Add-MtDbTrace -ConnectionString $ConnectionString
            }
            else{
                Start-RSJob -ModulesToImport "MeasureTrace.Database" -Throttle $parallelThrottle -Batch $batchLabel -ScriptBlock {
                    Invoke-MtDbMeasureTraceWithFallbackToEmptyTrace -PackageFileNameFull $args[0].PackageFileNameFull | Add-MtDbTrace -ConnectionString $args[1]
                } -ArgumentList $_,$ConnectionString
            }
          }
    }
    end{
        Get-RSJob -Batch $batchLabel | Wait-RSJob -ShowProgress -Timeout ([Math]::Sqrt($MaxCount) * 30)
        Get-RSJob -Batch $batchLabel | Receive-RSJob
    }
}


function Invoke-MtDbMeasureTraceWithFallbackToEmptyTrace {
    param( $PackageFileNameFull )
    $mt = $null
    $errorText = ""
    try{
        $mt = Measure-Trace -Path $PackageFileNameFull
    }
    catch{
        Write-Verbose $_
        $errorText = $_.ToString()
    }
    if( $mt -eq $null ){
        $mt = New-Object -TypeName MeasureTrace.TraceModel.Trace
        $mt.PackageFileNameFull = $PackageFileNameFull
        $mt.PackageFileName = Split-Path $PackageFileNameFull -Leaf
        $err = New-Object -TypeName MeasureTrace.TraceModel.TraceAttribute
        $err.Name = "MeasureTraceError"
        $err.StringValue = $errorText
        $mt.AddMeasurement($err)
    }
    $mt
}


<#
.Synopsis
   Create task to process traces from a folder into MeasuredTrace representation in a database
.DESCRIPTION
   Via scheduled task-- compares a given folder of traces against a database of parsed traces. For traces which are not yet
   reflected in the db it attempts to Measure them and export Measurements to the database
.EXAMPLE
   Register-MtDbMeasurementTask -Path d:\MyEtlTraces -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI'
.EXAMPLE
   Register-MtDbMeasurementTask -Path d:\MyEtlTraces -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI' -MaxCount 100 -Parallel
.EXAMPLE
    ## Continuous ingestion as traces arrive
    while( $true ){
        Measure-MtDbNewTracesFromFolder -Path d:\MyEtlTraces -ConnectionString 'server=localhost;database=MyTraces;integrated security=SSPI' -MaxCount 100 -Parallel
        Start-Sleep -Seconds 60
    }
#>

function Register-MtDbMeasurementTask{
    param(
        ## Path to a folder containing ETL traces
        [string[]]$Path,
        ## For connection to database. See Example section
        [string]$ConnectionString = 'server=(localdb)\MsSqlLocalDb;database=MeasureTrace;integrated security=true',
        ## Max number of new traces to process on each pass
        [int]$MaxCount = 50,
        ## Enables parallel processing via RSJobs
        [switch]$Parallel,
        [switch]$StartNow,
        [string[]]$MoveFromPath = @()

    )
    $task0ActionArgs = @(
        '-ExecutionPolicy'
        'Bypass'
        '-Command'
        '"&{Move-IdleFilesToFlatDestination -Path TEMPDATAPATHSTOKEN -Destination TEMPDATADESTINATIONPATHTOKEN }"'
        )
    $taskActionArgs = @(
        '-ExecutionPolicy'
        'Bypass'
        '-Command'
        '"&{Measure-MtDbNewTracesFromFolder -Path PATHTOKEN -ConnectionString CONNECTIONSTRINGTOKEN }"'
        )
    $taskPath = '\MeasureTrace.Database\'
    $taskName = 'Measure-MtDbNewTracesFromFolder'
    Get-ScheduledTask -TaskPath $taskPath -TaskName $taskName -ErrorAction SilentlyContinue | Unregister-ScheduledTask
    $tempPathsMergedToken = $MoveFromPath -replace '^',"'" -replace '$',"'" -join ","
    $pathsToWatchMergedToken = $Path -replace '^',"'" -replace '$',"'" -join ","
    $task0ActionArgs[3] = $task0ActionArgs[3] -replace 'TEMPDATAPATHSTOKEN',$tempPathsMergedToken -replace 'TEMPDATADESTINATIONPATHTOKEN',("'" + $Path[0] + "'")
    $taskActionArgs[3] = $taskActionArgs[3] -replace 'PATHTOKEN',$pathsToWatchMergedToken -replace 'CONNECTIONSTRINGTOKEN',("'" + $ConnectionString + "'")
    $ta = New-ScheduledTaskAction -Execute '%windir%\system32\WindowsPowerShell\v1.0\powershell.exe' -Argument ($taskActionArgs -join " ")
    $ta0 = New-ScheduledTaskAction -Execute '%windir%\system32\WindowsPowerShell\v1.0\powershell.exe' -Argument ($taskAction0Args -join " ")
    $tt = New-ScheduledTaskTrigger -Daily -At (Get-Date -Hour 0 -Minute 1) -DaysInterval 1
    $ts = New-ScheduledTaskSettingsSet -Compatibility Win8 -MultipleInstances IgnoreNew -Priority 4
    $st = Register-ScheduledTask -TaskName $taskName -TaskPath $taskPath -User 'NT AUTHORITY\NetworkService' -Action $ta0,$ta -Trigger $tt -Settings $ts
    $st.Triggers[0].Repetition.StopAtDurationEnd = $false
    $st.Triggers[0].Repetition.Interval = 'PT2M'
    $st.Triggers[0].Repetition.Duration = 'P1D'
    Set-ScheduledTask -InputObject $st
    if($StartNow){
        Get-ScheduledTask -TaskPath $taskPath -TaskName $taskName -ErrorAction SilentlyContinue | Start-ScheduledTask
    }
}

<#
.Synopsis
   Assuming one of more folders containing uniquely named files, consolidates them into a flat target folder
.DESCRIPTION
   Assuming one of more folders containing uniquely named files, consolidates them into a flat target folder
.EXAMPLE
   Move-IdleFilesToFlatDestination -Path \\server1\share1,\\server2\share2 -Recurse -Destination \\server3\share3
#>

function Move-IdleFilesToFlatDestination{
    [CmdletBinding()]
    param(
        [string[]]$Path,
        $Destination,
        [switch]$Recurse,
        $Filter = "*",
        $WaitSeconds = 10,
        $WaitPassCount = 3,
        $MaxMoveCount = 500
    )
    Get-MovableFilesFromFolder @psboundParameters | Select-Object -First $MaxMoveCount |
      Move-Item -Destination $Destination
}


#region PrivateUtilityFunctions

function Test-FileHandleOpen{
    [CmdletBinding()]
    param(
        [Alias("FullName")]
        [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
        [string]$Path,
        [System.IO.FileAccess]$FileAccess = [System.IO.FileAccess]::ReadWrite,
        [System.IO.FileMode]$FileMode = [System.IO.FileMode]::Open,
        [System.IO.FileShare]$FileShare = [System.IO.FileShare]::None
    )
 
    if(-not (Test-Path $Path -ErrorAction SilentlyContinue)){
        return $false
    }
    try{
        $stream = New-Object -TypeName System.IO.Filestream -ArgumentList $Path,$FileMode,$FileAccess,$FileShare
        $stream.Lock(0,4)
        $stream.Unlock(0,4)
        $stream.Close()
        return $true
    }
    catch{
        Write-Verbose $_
        $false
    }
}
 
function Get-MovableFilesFromFolder {
    param(
        [string[]]$Path,
        [switch]$Recurse,
        $Filter = "*",
        $WaitSeconds = 20,
        $WaitPassCount = 5
    )
    $fileStates = @{}
    $passCount = 0
    ## First pass, populate data only
    Get-ChildItem -Path $Path -Recurse:$Recurse -Filter $Filter | 
      Where-Object {-not $_.PsIsContainer } |
      Foreach-Object{
        $fileStates[$_.FullName] = [pscustomobject]@{FileInfo = $_ ; ChangeCount = 0 ; AccessFailCount = 0 }
    }
    ## Evaluation passes
    while($passCount -lt $WaitPassCount){
        $passCount++
        Get-ChildItem -Path $Path -Recurse:$Recurse -Filter $Filter | 
          Foreach-Object{
            $referenceNode = $fileStates[$_.FullName]
            if( $referenceNode -ne $null ){
                if( $_.Length -ne $referenceNode.FileInfo.Length){ $referenceNode.ChangeCount++ }
                if( $_.LastWriteTime -ne $referenceNode.FileInfo.LastWriteTime){ $referenceNode.ChangeCount++ }
                if( -not (Test-FileHandleOpen -Path $_.FullName )){ $referenceNode.AccessFailCount++ }
            }
        }
        Start-Sleep -Seconds $WaitSeconds
    }
    $fileStates.Values | Where-Object{ $_.ChangeCount -eq 0 } | 
      Where-Object{ $_.AccessFailCount -eq 0 } |
      Select-Object -ExpandProperty FileInfo
}