public/Get-PodStatus.ps1

<#
.SYNOPSIS
Get the status of the pods for a release

.PARAMETER Selector
K8s selector for finding the pods

.PARAMETER ReplicaCount
Number of pods to wait for that match the selector

.PARAMETER PollIntervalSec
Seconds to wait between polls defaults to 5

.PARAMETER TimeoutSecs
Timeout in seconds for waiting on the pods. Defaults to 600

.PARAMETER Namespace
K8s namespace to use, defaults to default

.PARAMETER PodType
If Pod, all containers must be running, otherwise for jobs all must be terminated ok

.PARAMETER LogFileFolder
If specified, pod logs will be written to this folder

.EXAMPLE
$hookStatus = Get-PodStatus -Selector "job-name=$PreHookJobName" `
                                            -Namespace $Namespace `
                                            -TimeoutSec 1 `
                                            -PollIntervalSec $PollIntervalSec `
                                            -PodType PreInstallJob

Get the status of a pre-install job pod

.OUTPUTS
Array of PodStatus objects
#>

function Get-PodStatus {
[CmdletBinding()]
param(
    [Parameter(Mandatory)]
    [string]$Selector,
    [ValidateRange(1, 100)]
    [int] $ReplicaCount = 1,
    [ValidateRange(1, 600)]
    [int] $PollIntervalSec = 5,
    [int] $TimeoutSec = 600,
    [string] $Namespace = "default",
    [ValidateSet("Pod", "PreInstallJob", "Job")]
    [string] $PodType = "Pod",
    [string] $LogFileFolder

)
$ErrorActionPreference = 'Stop'
Set-StrictMode -Version Latest

$runningCount = 0
$runningPods = @{}
$podStatuses = @{}

if ($PodType -eq "PreInstallJob" ) {
    $IsJob = $true
    $okPhase = "Succeeded"
    $prefix = "preHook job pod"
} elseif ($PodType -eq "Job") {
    $IsJob = $true
    $okPhase = "Succeeded"
    $prefix = "job pod"
} else {
    $IsJob = $false
    $okPhase = ,"Running"
    $prefix = "pod"
}

# are all containers in the pod ready?
function allContainersReady($containerStatuses) {

    $readyContainers = @()
    Write-Verbose "ContainerStatuses: $($containerStatuses | ConvertTo-Json -Depth 10 -EnumsAsStrings)"
    if ($IsJob) {
        # for a job, all containers need to be complete
        $readyContainers += $containerStatuses | Where-Object { (Get-Member -InputObject $_.state -Name 'terminated') -and $_.state.terminated.reason -eq 'Completed'}
    } else {
        # for deploys all should be running
        $readyContainers += $containerStatuses | Where-Object ready -eq $true
    }
    $podIsReady = $readyContainers.Count -eq $containerStatuses.Count
    Write-Verbose "Checking containerStatuses for $($IsJob ? 'job' : 'NON job'). PodIsReady = $podIsReady"
    return $podIsReady
}

$start = Get-Date
$timeoutEnd = $start.AddSeconds($TimeoutSec)
$logSeconds = "600s"
$extraSeconds = 1 # extra seconds to add to logSeconds to avoid missing something
$lastEventTime = (Get-Date).AddMinutes(-5)
$timedOut = $false

Write-Status "Checking status of pods that match selector $Selector for ${TimeoutSec}s"
while ($runningCount -lt $ReplicaCount -and !$timedOut)
{
    $timedOut = (Get-Date) -gt $timeoutEnd

    # $pods = kubectl get pod --namespace $Namespace --selector "$LabelName=$AppName" --field-selector "status.phase!=Running" -o json | ConvertFrom-Json
    Write-Verbose "kubectl get pod --namespace $Namespace --selector $Selector --sort-by=.metadata.name -o json"
    $pods = kubectl get pod --namespace $Namespace --selector $Selector --sort-by=.metadata.name -o json | ConvertFrom-Json

    if (!$pods) {
        throw "No data from kubectl get pod --namespace $Namespace --selector $Selector"
    }
    $pods = $pods.items
    Write-Verbose "Got $($pods.Count) pods from kubectl get pod --namespace $Namespace --selector $Selector"

    $i = 0
    foreach ($pod in $pods) {

        $i += 1
        if ($runningPods[$pod.metadata.name]) {
            continue # this pod is already completed
        }

        if (!$podStatuses[$pod.metadata.name]) {
            $podStatuses[$pod.metadata.name] = [PodStatus]::new($pod.metadata.name)
        }

        $HasInit = [bool](Get-Member -InputObject $pod.spec -Name initContainers -ErrorAction SilentlyContinue)
        Write-Verbose "Pod $($pod.metadata.name) has init container: $HasInit."

        if (!(Get-Member -InputObject $pod.status -Name containerStatuses)) {
            Write-Status "Pod $($pod.metadata.name) has no status.containerStatuses. May not be schedulable yet." -LogLevel warning
            Write-Verbose "Pod:`n$($pod | ConvertTo-Json -Depth 10)"
            if (!$timedOut) {
                continue
            }
            $podStatuses[$pod.metadata.name].Status = [Status]::Timeout

            # write final events and logs for this pod
            Write-Verbose "Calling Write-PodEvent for pod $($pod.metadata.name) with LogLevel ok and FilterStartupWarnings"
            $podStatuses[$pod.metadata.name].LastBadEvents = Write-PodEvent -Prefix $prefix -PodName $pod.metadata.name `
                                                                            -Namespace $Namespace `
                                                                            -PassThru `
                                                                            -LogLevel error `
                                                                            -FilterStartupWarnings
            # no logs since no containers
            break
        }

        $containers = @($pod.status.containerStatuses).Count
        $readyContainers = @($pod.status.containerStatuses | Where-Object ready -eq $true).Count
        Write-Verbose "ReplicaCount: $ReplicaCount RunningCount: $RunningCount PodCount: $($pods.Count)"

        Write-Status "Checking $prefix $i/${ReplicaCount} $prefix $($pod.metadata.name) in $($pod.status.phase) phase" -LogLevel normal -Length 0
        Write-Status " $readyContainers/$containers containers ready. $([int](((Get-Date) - $start).TotalSeconds))s elapsed of ${TimeoutSec}s." -LogLevel normal -Length 0

        if ($VerbosePreference -eq 'Continue' ) {
            $pod | ConvertTo-Json -Depth 10 | Out-File (Join-Path ([System.IO.Path]::GetTempPath()) "pod.json")
        }

        Write-Verbose "Ok phase is $okPhase. Pod's phase is $($pod.status.phase)"
        if ($pod.status.phase -eq $okPhase) {
            Write-Verbose " $prefix $($pod.metadata.name) status is $($pod.status.phase)"
            if (allContainersReady $pod.status.containerStatuses) {

                $status = $IsJob ? [Status]::Completed : [Status]::Running
                Write-Status "$prefix $($pod.metadata.name) has all containers ready or completed. Status is $status"

                $runningCount += 1
                $runningPods[$pod.metadata.name] = $true

                $podStatuses[$pod.metadata.name].Status = $status
                $podStatuses[$pod.metadata.name].ContainerStatuses = @($pod.status.containerStatuses | ForEach-Object { [ContainerStatus]::new($_.name, $status) })
                if ($HasInit) {
                    $podStatuses[$pod.metadata.name].InitContainerStatuses = @($pod.status.initContainerStatuses | ForEach-Object { [ContainerStatus]::new($_.name, $status) })
                }

                # write final events and logs for this pod
                Write-Verbose "Calling Write-PodEvent for pod $($pod.metadata.name) with LogLevel ok and FilterStartupWarnings"
                $podStatuses[$pod.metadata.name].LastBadEvents = Write-PodEvent -Prefix $prefix -PodName $pod.metadata.name `
                                                                                -Namespace $Namespace `
                                                                                -PassThru `
                                                                                -LogLevel ok `
                                                                                -FilterStartupWarnings

                $podStatuses[$pod.metadata.name].PodLogFile = Write-PodLog -Prefix $prefix -PodName $pod.metadata.name -Namespace $Namespace -LogLevel ok -HasInit:$HasInit -LogFileFolder $LogFileFolder
                continue
            } else {
                Write-Verbose "Pod $($pod.metadata.name) is ready (phase = $okPhase), but pod containerStatuses are: $($pod.status.containerStatuses | out-string)"
            }
        }

        if ($timedOut) {
            Write-PodEvent -Prefix $prefix -PodName $pod.metadata.name -Namespace $Namespace -LogLevel warning -FilterStartupWarnings
            $podStatuses[$pod.metadata.name].PodLogFile = Write-PodLog -Prefix $prefix -PodName $pod.metadata.name -Namespace $Namespace -LogLevel warning -HasInit:$HasInit -LogFileFolder $LogFileFolder
            break
        }

        # check for any errors since not ready yet
        $lastEventTime = Get-Date
        $events = Get-PodEvent -Namespace $Namespace -PodName $pod.metadata.name
        if ($events) {
            $errors = @($events | Where-Object { $_.type -ne "Normal" -and $_.message -notlike "Startup probe failed:*" -and $_.reason -ne "FailedScheduling"})
            Write-Verbose "Got $($errors.count) error of $($events.count) events for pod $($pod.metadata.name) "
            if ($errors -or $pod.status.phase -eq "Failed" ) {
                Write-Status "Pod $($pod.metadata.name) has $($errors.count) errors" -LogLevel Error
                # write final events and logs for this pod
                Write-Verbose "Calling Write-PodEvent for pod $($pod.metadata.name) with LogLevel Error"
                $podStatuses[$pod.metadata.name].LastBadEvents = Write-PodEvent -Prefix $prefix -PodName $pod.metadata.name -Namespace $Namespace -LogLevel Error -PassThru
                $podStatuses[$pod.metadata.name].PodLogFile = Write-PodLog -Prefix $prefix -PodName $pod.metadata.name -Namespace $Namespace -LogLevel Error -HasInit:$HasInit -LogFileFolder $LogFileFolder

                # get latest pod status since sometimes get containerCreating status here
                $name = $pod.metadata.name
                Write-Verbose "kubectl get pod --namespace $Namespace $name -o json"
                $podJson = kubectl get pod --namespace $Namespace $name -o json
                $pod = $podJson | ConvertFrom-Json
                if (!$pod -or !(Get-Member -InputObject $pod -Name metadata)) {
                    Write-Warning "Unexpected response from kubectl get pod --namespace $Namespace $name JSON is: '$podJson'"
                    throw "Unexpected response from kubectl get pod --namespace $Namespace $name"
                }

                $podStatuses[$pod.metadata.name].ContainerStatuses = @($pod.status.containerStatuses | ForEach-Object {
                        Write-Verbose "Pod status: $($_ | ConvertTo-Json -Depth 10)"
                        [ContainerStatus]::new($_.name, $_) })
                if ($HasInit) {
                    $podStatuses[$pod.metadata.name].InitContainerStatuses = @($pod.status.initContainerStatuses | ForEach-Object { [ContainerStatus]::new($_.name, $_) })
                }
                $podStatuses[$pod.metadata.name].DetermineStatus()
                Write-Verbose "Get-PodStatus returning $($podStatuses[$pod.metadata.name] | ConvertTo-Json -Depth 10 -EnumsAsStrings)"
                return $podStatuses.Values

            } elseif ($VerbosePreference -eq 'Continue') {
                Write-Verbose "No errors found in events for pod $($pod.metadata.name) yet"

                Write-PodEvent -Prefix $prefix -PodName $pod.metadata.name -Since $lastEventTime -Namespace $Namespace
                $podStatuses[$pod.metadata.name].PodLogFile = Write-PodLog -Prefix $prefix -PodName $pod.metadata.name -Since $logSeconds -Namespace $Namespace -HasInit:$HasInit
            }
       } # else no events
       # TODO we've seen case where pod.status.containerStatuses.state.waiting has
       # message: secret "eventhub-disabled-bootstrap-servers" not found
       # reason: CreateContainerConfigErrorreason: ImagePullBackOff
       # but nothing in events. Local testing always has events.
    } # end foreach pod

    if ($runningCount -ge $ReplicaCount) {
        Write-Status "All ${prefix}s ($runningCount/$ReplicaCount) that matched selector $Selector are running`n" -Length 0 -LogLevel normal
        break
    }

    Write-Verbose "Sleeping $PollIntervalSec second$($PollIntervalSec -eq 1 ? '': 's'). Running Count = $runningCount ReplicaCount = $ReplicaCount"
    Start-Sleep -Seconds $PollIntervalSec
    $logSeconds = "$($PollIntervalSec + $extraSeconds)s"
} # end while check pods

$ok = [bool]($runningCount -ge $ReplicaCount)
if (!$ok) {
    Write-Verbose "Times: $(Get-Date) -lt $($timeoutEnd) Values count: $($podStatuses.Values.Count)"
    Write-Status "Error getting status for pods that matched selector $Selector after $([int](((Get-Date) - $start).TotalSeconds))s" `
                -Length 0 `
                -LogLevel Error
    Write-Status " RunningCount: $runningCount ReplicaCount: $ReplicaCount Ok: $ok TimedOut $timedOut" `
                -Length 0 `
                -LogLevel Error
    if ($podStatuses.Count -eq 0) {
        $status = [PodStatus]::new("<no pods found>")
        $status.Status = [Status]::Timeout
        $podStatuses["<no pods found>"] = $status
    }
    $podStatuses.Values | ForEach-Object { $_.Status = [Status]::Timeout }

}

return $podStatuses.Values
}