DownloadErrorScript.ps1

<#PSScriptInfo
 
.VERSION 2.1
 
.GUID a026e502-a561-4270-85ec-ae66f02897f1
 
.AUTHOR Microsoft Corporation
 
.COMPANYNAME Microsoft Corporation
 
.COPYRIGHT (c) Microsoft Corporation. All rights reserved.
 
.TAGS Microsoft Graph Search PowerShell
 
.LICENSEURI https://learn.microsoft.com/en-us/legal/mdsa?redirectedfrom=MSDN
 
.PROJECTURI https://learn.microsoft.com/en-us/microsoftsearch/connector-details-errors
 
.ICONURI https://contoso.com/Icon
 
.EXTERNALMODULEDEPENDENCIES
 
.REQUIREDSCRIPTS
 
.EXTERNALSCRIPTDEPENDENCIES
 
.RELEASENOTES
 
#>


#Requires -Module MSAL.PS

<#
.Synopsis
Script to download item errors for a connection
 
.Description
This script downloads the item errors in a Microsoft Graph Connectors Connection. An Msal token is generated using the tenant credentials in which the connection is present.
That token is then used to first get all the distinct error codes with which the items failed and then for each error code all the item errors are fetched iteratively.
Getting item errors for a particular error code is a batched API call with default batch size as 250.
 
Token is generated using MSAL.PS module, while installing this PS script, MSAL.PS module should be installed automatically.
If this doesn't happen use the below command to install MSAL.PS
 
Install-Module -Name MSAL.PS
 
.Parameter ConnectionId
ConnectionId for which the item errors need to be downloaded.
 
.Parameter OutputFile
OutputFile in which item errors should get written. It should not contain extension as by default .csv will be used
 
.Parameter BatchSize
Optional Parameter to set the batchSize for getting item errors for a particular error code. Increasing the batch size may increase the probability of failures.
#>


# Taking Parameters
param(
    [Parameter(Mandatory=$true)][string]$ConnectionId,
    [Parameter(Mandatory=$true)][string]$OutputFile,
    [Parameter()][ValidateRange(1, 5000)][int]$BatchLimit
)

# Check if the BatchLimit is not provided or is 0
if (-not ($BatchLimit) -or ($BatchLimit -eq 0))
{
    $validInput = $false

    # Prompt the user for input until a valid integer greater than 0 is provided
    while (-not $validInput)
    {
        $tempBatchLimit = Read-Host "BatchLimit (press Enter to use the default value '250')"

        # If the user just presses Enter, use the default value
        if (-not ($tempBatchLimit))
        {
            $BatchLimit = 250
            $validInput = $true
        }
        else
        {
            # Try to parse the user input as an integer
            $parsedTempBatchLimit = 0
            if ([System.Int32]::TryParse($tempBatchLimit, [ref]$parsedTempBatchLimit) -and $parsedTempBatchLimit -gt 0)
            {
                $BatchLimit = $parsedTempBatchLimit
                $validInput = $true
            }
            else
            {
                Write-Error "Invalid input. Please enter a valid integer greater than 0."
            }
        }
    }
}

# Start logging to a file
Start-Transcript -Path "$OutputFile.log" 

# Log the Parameters used
Write-Host "ConnectionId : $ConnectionId `nOutput File Name: $OutputFile `nBatch Limit: $BatchLimit"

# Get tenant environment using TenantId
function GetEnvironmentFromTenant {
    param (
        $tenantId
    )

    New-Variable -Name GccSubRegion -Value "gcc" -Option Constant
    New-Variable -Name UsgRegion -Value "usg" -Option Constant
    New-Variable -Name UsGovRegion -Value "usgov" -Option Constant
    New-Variable -Name DodSubRegion -Value "dod" -Option Constant
    New-Variable -Name DodConSubRegion -Value "dodcon" -Option Constant
    New-Variable -Name GCCTenantRegionScope -Value "na" -Option Constant

    $headers = @{"Accept" = "application/json"}
    $tenantCloudInfo = Invoke-RestMethodWithRetries -ApiName "GetTenantCloudInfo" -Uri "https://login.microsoftonline.com/$tenantId/.well-known/openid-configuration" -Headers $headers -Method Get | ConvertFrom-Json
    
    $regionScope = ""
    $regionSubScope = ""

    if( -not ([string]::IsNullOrEmpty($tenantCloudInfo.tenant_region_scope)) )
    {
        $regionScope = ([string]$tenantCloudInfo.tenant_region_scope).ToLower()
    }

    if( -not ([string]::IsNullOrEmpty($tenantCloudInfo.tenant_region_sub_scope)) )
    {
        $regionSubScope = ([string]$tenantCloudInfo.tenant_region_sub_scope).ToLower()
    }

    if(($regionScope -eq $GCCTenantRegionScope) -and ($regionSubScope -eq $GccSubRegion))
    {
        return "GCC"
    }
    elseif (($regionScope -eq $UsgRegion) -or ($regionScope -eq $UsGovRegion))
    {
        if($regionSubScope -eq $DodConSubRegion)
        {
            return "GCCH"
        }
        elseif($regionSubScope -eq $DodSubRegion)
        {
            return "DoD"
        }
    }

    return "PROD"
}

# Select BaseUrl on the basis of Environment
function SelectBaseUrl {
    param (
        $selectedEnvironment
    )
    
    if ($selectedEnvironment -eq "GCC") 
    {
        return "https://gcsgcc.office.com"
    } elseif ($selectedEnvironment -eq "GCCH") 
    {
        return "https://gcs.office365.us"
    } elseif ($selectedEnvironment -eq "PROD") 
    {
        return "https://gcs.office.com"
    }

    throw "Tenant Environment is not supported"
}

function InitializeOutputFile {
    param (
        $fileName
    )
    "Time,Path,Error Code,Detailed Error Code,Message" | Set-Content -Path $fileName
}

function WriteErrorBatchToOutputFile {
    param (
        $errorsBatch,
        $fileName
    )

    Foreach ($error in $errorsBatch.errors) 
    {
        $outputLine = Get-Date($error.timeStamp) -UFormat "%m/%d/%Y %T"
        $outputLine = $outputLine +","+ $error.displayText +","+ $errorsBatch.Code +","+ $error.detailedCode +',"'+ $errorsBatch.message +'"'
        $outputLine | Add-Content -Path $fileName
    }
}

function WriteExceptionToOutputFile {
    param (
        $errorMessage,
        $fileName
    )

    $errorMessage | Add-Content -Path $fileName
}

function Invoke-RestMethodWithRetries {
    param (
        [string]$apiName,
        [string]$uri,
        [hashtable]$headers,
        [string]$method,
        [int]$maxRetries = 3
    )

    $retryCount = 0
    $errorMessage = ""
    do
    {
        try 
        {
            Write-Host "Request made for $apiName."
            return Invoke-WebRequest -Uri $uri -Headers $headers -Method $method 
        } 
        catch
        {
            $errorMessage = $_
            $retryCount++
            $exponentialBackoff = [int]([Math]::Pow(2,$retryCount))
            Write-Warning "Request failed for $apiName. Retrying (Attempt $retryCount)..."
            Start-Sleep -Seconds $exponentialBackoff  # Increase the wait time with each retry
        }
    }
    while ($retryCount -lt $maxRetries)

    throw "Request failed for $apiName. All the retries are exhausted. `n$errorMessage"
}

function Get-TokenWithRetries {
    param (
        [string]$clientId,
        [string]$authority,
        [string]$redirectUri,
        [string]$scopes,
        [bool]$isRefresh = $false,
        [int]$maxRetries = 3
    )

    $retryCount = 0
    $errorMessage = ""
    do 
    {
        try 
        {
            if($isRefresh)
            {
                Write-Host "Requesting for Msal Refresh Access token."
                return Get-MsalToken -ClientId $clientId -Authority $authority -RedirectUri $redirectUri -Scopes $scopes -ForceRefresh
            }
            else
            {
                Write-Host "Requesting for Msal Access token."
                return Get-MsalToken -ClientId $clientId -Authority $authority -RedirectUri $redirectUri -Scopes $scopes
            }
        } 
        catch 
        {
            $errorMessage = $_
            $retryCount++
            $exponentialBackoff = [int]([Math]::Pow(2,$retryCount))
            Write-Warning "Request failed for Generating Msal Token. Retrying (Attempt $retryCount)..."
            Start-Sleep -Seconds $exponentialBackoff  # Increase the wait time with each retry
        }
    } 
    while ($retryCount -lt $maxRetries)
    
    throw "Request failed for Generating Msal Token. All the retries are exhausted. `n$errorMessage"
} 

# Initializing Global Variables
$OutputFile = "$OutputFile.csv"

try 
{
    # Get the access token
    $clientId = "32287083-2fe0-4a7c-86ad-d84963371567"
    $authority = "https://login.microsoftonline.com/common"
    $redirectUri = "https://login.microsoftonline.com/common/oauth2/nativeclient"
    $scopes = 'https://gcs.office.com/.default'

    $token = Get-TokenWithRetries -ClientId $clientId -Authority $authority -RedirectUri $redirectUri -Scopes $scopes
    
    #Initiliazing Output file
    InitializeOutputFile -FileName $OutputFile

    # Get the current language name
    $culture = Get-Culture
    $languageCode = $culture.Name
    
    # Get the Environment
    $environment = GetEnvironmentFromTenant -TenantId $token.TenantId

    # Initializing variables for the API call
    $baseUrl = SelectBaseUrl -SelectedEnvironment $environment
    $headers = @{"authorization"="Bearer $($token.AccessToken)"; "Content-Type"="application/json"; "Accept" = "application/json"; "Accept-Language" = "$languageCode"}

    # Get the list of Error Codes
    $errorCodesResponse = Invoke-RestMethodWithRetries -ApiName "GetAggregatedErrors" -Uri "$baseUrl/v1.0/admin/datasets/$ConnectionId/errors" -Headers $headers -Method Get | ConvertFrom-Json
    Write-Host "Total Error Count is: $($errorCodesResponse.errorCount)"

    #Initializing variables
    $errorsReceivedSoFar = "0"
    $errorsMissedSoFar = "0"
    $errorsRemaining = $errorCodesResponse.errorCount

    # Get all the error items for each error code
    Foreach($errorCode in $errorCodesResponse.errorCodes)
    {
        $currentErrorCode = $errorCode.code
        $currentOffset = "0"
        $currentErrorCodeCount = $errorCode.count
        $errorsReceivedForCurrentErrorCode = "0"
        $errorsMissedForCurrentErrorCode = "0"

        Write-Host "Current Error Code: $currentErrorCode, Total Count: $currentErrorCodeCount"

        while($currentOffset -ne "-1") 
        {
            Write-Host "Current Offset: $currentOffset"

            # Refresh the token if it has expired
            $currentTime = Get-Date
            $tokenExpiry = $token.ExpiresOn.LocalDateTime
            Write-Host "CurrentTime: $currentTime, TokenExpiresOn: $tokenExpiry"

            # Refresh the token if it's about to expire in the next 5 seconds
            if ($tokenExpiry -lt ($currentTime.AddSeconds(5))) 
            {
                Write-Host "Access token is about to expire. Refreshing..."
                $token = Get-TokenWithRetries -ClientId $clientId -Authority $authority -RedirectUri $redirectUri -Scopes $scopes -IsRefresh $true
                $headers = @{"authorization"="Bearer $($token.AccessToken)"; "Content-Type"="application/json"; "Accept" = "application/json"; "Accept-Language" = "$languageCode"}
            }
            
            try 
            {
                $itemErrorsBatchResponse = Invoke-RestMethodWithRetries -ApiName "GetErrorsByErrorCode" -Uri "$baseUrl/v1.0/admin/datasets/$ConnectionId/errors/${currentErrorCode}?offset=$currentOffset&limit=$BatchLimit" -Headers $headers -Method Get | ConvertFrom-Json
                WriteErrorBatchToOutputFile -ErrorsBatch $itemErrorsBatchResponse -FileName $OutputFile
                $currentOffset = $itemErrorsBatchResponse.metaData.nextOffset
                $errorsReceivedForCurrentErrorCode = [string]([int]$errorsReceivedForCurrentErrorCode + [int]$itemErrorsBatchResponse.metadata.count)
            }
            catch 
            {
                Write-Warning "Error while getting the batch for offset: $currentOffset. `nSkipping this batch."
                WriteExceptionToOutputFile -ErrorMessage $_ -FileName $OutputFile
                
                if([int]$currentOffset + [int]$BatchLimit -lt $currentErrorCodeCount)
                {
                    $currentOffset = [string]([int]$currentOffset + [int]$BatchLimit)
                    $errorsMissedForCurrentErrorCode = [string]([int]$errorsMissedForCurrentErrorCode + [int]$BatchLimit)
                }
                else
                {
                    $currentOffset = "-1"
                    $errorsMissedForCurrentErrorCode = [string]([int]$currentErrorCodeCount - [int]$errorsReceivedForCurrentErrorCode)
                }
            }

            Write-Host "Current Error Code: $currentErrorCode, Errors Received For current error code: $errorsReceivedForCurrentErrorCode, Errors Missed For Current Error Code: $errorsMissedForCurrentErrorCode"
        }

        $errorsReceivedSoFar = [string]([int]$errorsReceivedSoFar + [int]$errorsReceivedForCurrentErrorCode)
        $errorsMissedSoFar = [string]([int]$errorsMissedSoFar + [int]$errorsMissedForCurrentErrorCode)
        $errorsRemaining = [string]([int]$errorCodesResponse.errorCount - ([int]$errorsReceivedSoFar + [int]$errorsMissedSoFar))
        Write-Host "Total Error Count: $($errorCodesResponse.errorCount), Errors remaining: $errorsRemaining, Errors Missed: $errorsMissedSoFar, Errors Received: $errorsReceivedSoFar"
    }
} 
catch 
{
    Write-Error "Error in running GetErrorItems.ps1 Error message: `n$_`n";
    WriteExceptionToOutputFile -ErrorMessage $_ -FileName $OutputFile
}

# Open the output file
Invoke-Item $OutputFile

# Stop Logging
Stop-Transcript