diff --git a/.azure/pipelines/jobs/default-build.yml b/.azure/pipelines/jobs/default-build.yml index 45d7bc6184..5a829c68cf 100644 --- a/.azure/pipelines/jobs/default-build.yml +++ b/.azure/pipelines/jobs/default-build.yml @@ -126,6 +126,11 @@ jobs: steps: - checkout: self clean: true + - ${{ if and(eq(parameters.agentOs, 'Windows'), eq(parameters.isTestingJob, true)) }}: + - powershell: ./eng/scripts/InstallProcDump.ps1 + displayName: Install ProcDump + - powershell: ./eng/scripts/StartDumpCollectionForHangingBuilds.ps1 $(ProcDumpPath)procdump.exe artifacts/log/ (Get-Date).AddMinutes(160) dotnet + displayName: Start background dump collection - ${{ if eq(parameters.installNodeJs, 'true') }}: - task: NodeTool@0 displayName: Install Node 10.x @@ -165,6 +170,12 @@ jobs: - ${{ parameters.afterBuild }} + - ${{ if and(eq(parameters.agentOs, 'Windows'), eq(parameters.isTestingJob, true)) }}: + - powershell: ./eng/scripts/FinishDumpCollectionForHangingBuilds.ps1 artifacts/log/ + displayName: Finish background dump collection + continueOnError: true + condition: always() + - ${{ if eq(parameters.agentOs, 'Windows') }}: - powershell: eng\scripts\KillProcesses.ps1 displayName: Kill processes diff --git a/eng/scripts/FinishDumpCollectionForHangingBuilds.ps1 b/eng/scripts/FinishDumpCollectionForHangingBuilds.ps1 new file mode 100644 index 0000000000..b6123e49a4 --- /dev/null +++ b/eng/scripts/FinishDumpCollectionForHangingBuilds.ps1 @@ -0,0 +1,76 @@ +param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string] + $ProcDumpOutputPath +) + +Write-Output "Finishing dump collection for hanging builds."; + +$repoRoot = Resolve-Path "$PSScriptRoot\..\.."; +$ProcDumpOutputPath = Join-Path $repoRoot $ProcDumpOutputPath; + +$sentinelFile = Join-Path $ProcDumpOutputPath "dump-sentinel.txt"; +if ((-not (Test-Path $sentinelFile))) { + Write-Output "No sentinel file available in '$sentinelFile'. " + + "StartDumpCollectionForHangingBuilds.ps1 has not been executed, is not correctly configured or failed before creating the sentinel file."; + return; +} + +Get-Process "procdump" -ErrorAction SilentlyContinue | ForEach-Object { Write-Output "ProcDump with PID $($_.Id) is still running."; }; + +$capturedDumps = Get-ChildItem $ProcDumpOutputPath -Filter *.dmp; +$capturedDumps | ForEach-Object { Write-Output "Found captured dump $_"; }; + +$JobName = (Get-Content $sentinelFile); + +if ($JobName.Count -ne 1) { + if ($JobName.Count -eq 0) { + Write-Warning "No job name found. This is likely an error."; + return; + } + else { + Write-Output "Multiple job names found '$JobName'."; + return; + } +} + +$dumpCollectionJob = Get-Job -Name $JobName -ErrorAction SilentlyContinue; +$registeredJob = Get-ScheduledJob -Name $JobName -ErrorAction SilentlyContinue; + +if ($null -eq $dumpCollectionJob) { + Write-Output "No job found for '$JobName'. It either didn't run or there is an issue with the job definition."; + + if ($null -eq $registeredJob) { + Write-Warning "Couldn't find a scheduled job '$JobName'."; + } + return; +} + +Write-Output "Listing existing jobs"; +Get-Job -Name CaptureDumps* + +Write-Output "Listing existing scheduled jobs"; +Get-ScheduledJob -Name CaptureDumps* + +Write-Output "Displaying job output"; +Receive-Job $dumpCollectionJob; + +Write-Output "Waiting for current job to finish"; +Get-Job -ErrorAction SilentlyContinue | Wait-Job; + +try { + Write-Output "Removing collection job"; + Remove-Job $dumpCollectionJob; +} +catch { + Write-Output "Failed to remove collection job"; +} + +try { + Write-Output "Unregistering scheduled job"; + Unregister-ScheduledJob $registeredJob; +} +catch { + Write-Output "Failed to unregister $JobName"; +} diff --git a/eng/scripts/InstallProcDump.ps1 b/eng/scripts/InstallProcDump.ps1 new file mode 100644 index 0000000000..92334ffb7b --- /dev/null +++ b/eng/scripts/InstallProcDump.ps1 @@ -0,0 +1,46 @@ +<# +.SYNOPSIS + Installs ProcDump into a folder in this repo. +.DESCRIPTION + This script downloads and extracts the ProcDump. +.PARAMETER Force + Overwrite the existing installation +#> +param( + [switch]$Force +) +$ErrorActionPreference = 'Stop' +$ProgressPreference = 'SilentlyContinue' # Workaround PowerShell/PowerShell#2138 + +Set-StrictMode -Version 1 + +$repoRoot = Resolve-Path "$PSScriptRoot\..\.." +$installDir = "$repoRoot\.tools\ProcDump\" +$tempDir = "$repoRoot\obj" + +if (Test-Path $installDir) { + if ($Force) { + Remove-Item -Force -Recurse $installDir + } + else { + Write-Host "ProcDump already installed to $installDir. Exiting without action. Call this script again with -Force to overwrite." + exit 0 + } +} + +Remove-Item -Force -Recurse $tempDir -ErrorAction Ignore | out-null +mkdir $tempDir -ea Ignore | out-null +mkdir $installDir -ea Ignore | out-null +Write-Host "Starting ProcDump download" +Invoke-WebRequest -UseBasicParsing -Uri "https://download.sysinternals.com/files/Procdump.zip" -Out "$tempDir/ProcDump.zip" +Write-Host "Done downloading ProcDump" +Expand-Archive "$tempDir/ProcDump.zip" -d "$tempDir/ProcDump/" +Write-Host "Expanded ProcDump to $tempDir" +Write-Host "Installing ProcDump to $installDir" +Move-Item "$tempDir/ProcDump/*" $installDir +Write-Host "Done installing ProcDump to $installDir" + +if ($env:TF_BUILD) { + Write-Host "##vso[task.setvariable variable=ProcDumpPath]$installDir" + Write-Host "##vso[task.prependpath]$installDir" +} diff --git a/eng/scripts/StartDumpCollectionForHangingBuilds.ps1 b/eng/scripts/StartDumpCollectionForHangingBuilds.ps1 new file mode 100644 index 0000000000..4ed696ec3c --- /dev/null +++ b/eng/scripts/StartDumpCollectionForHangingBuilds.ps1 @@ -0,0 +1,123 @@ +param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string] + $ProcDumpPath, + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string] + $ProcDumpOutputPath, + [Parameter(Mandatory = $true)] + [datetime] + $WakeTime, + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string []] + $CandidateProcessNames +) + +Write-Output "Setting up a scheduled job to capture process dumps."; + +if ((-not (Test-Path $ProcDumpPath))) { + Write-Warning "Can't find ProcDump at '$ProcDumpPath'."; +} +else { + Write-Output "Using ProcDump from '$ProcDumpPath'."; +} + +try { + $previousJobs = Get-Job -Name CaptureDumps* -ErrorAction SilentlyContinue; + $previousScheduledJobs = Get-ScheduledJob CaptureDumps* -ErrorAction SilentlyContinue; + + if ($previousJobs.Count -ne 0) { + Write-Output "Found existing dump jobs."; + } + + if ($previousScheduledJobs.Count -ne 0) { + Write-Output "Found existing dump jobs."; + } + + $previousJobs | Stop-Job -PassThru | Remove-Job; + $previousScheduledJobs | Unregister-ScheduledJob; +} +catch { + Write-Output "There was an error cleaning up previous jobs."; + Write-Output $_.Exception.Message; +} + +$repoRoot = Resolve-Path "$PSScriptRoot\..\.."; +$ProcDumpOutputPath = Join-Path $repoRoot $ProcDumpOutputPath; + +Write-Output "Dumps will be placed at '$ProcDumpOutputPath'."; +Write-Output "Watching processes $($CandidateProcessNames -join ', ')"; + +# This script registers as a scheduled job. This scheduled job executes after $WakeTime. +# When the scheduled job executes, it runs procdump on all alive processes whose name matches $CandidateProcessNames. +# The dumps are placed in $ProcDumpOutputPath +# If the build completes sucessfully in less than $WakeTime, a final step unregisters the job. + +# Create a unique identifier for the job name +$JobName = "CaptureDumps" + (New-Guid).ToString("N"); + +# Ensure that the dumps output path exists. +if ((-not (Test-Path $ProcDumpOutputPath))) { + New-Item -ItemType Directory $ProcDumpOutputPath | Out-Null; +} + +# We write a sentinel file that we use at the end of the build to +# find the job we started and to determine the results from the sheduled +# job (Whether it ran or not and to display the outputs form the job) +$sentinelFile = Join-Path $ProcDumpOutputPath "dump-sentinel.txt"; +Out-File -FilePath $sentinelFile -InputObject $JobName | Out-Null; + +[scriptblock] $ScriptCode = { + param( + $ProcDumpPath, + $ProcDumpOutputPath, + $CandidateProcessNames) + + Write-Output "Waking up to capture process dumps. Determining hanging processes."; + + [System.Diagnostics.Process []]$AliveProcesses = @(); + foreach ($candidate in $CandidateProcessNames) { + try { + $candidateProcesses = Get-Process $candidate; + $candidateProcesses | ForEach-Object { Write-Output "Found candidate process $candidate with PID '$($_.Id)'." }; + $AliveProcesses += $candidateProcesses; + } + catch { + Write-Output "No process found for $candidate"; + } + } + + Write-Output "Starting process dump capture."; + + $dumpFullPath = [System.IO.Path]::Combine($ProcDumpOutputPath, "hung_PROCESSNAME_PID_YYMMDD_HHMMSS.dmp"); + + Write-Output "Capturing output for $($AliveProcesses.Length) processes."; + + foreach ($process in $AliveProcesses) { + + $procDumpArgs = @("-accepteula", "-ma", $process.Id, $dumpFullPath); + try { + Write-Output "Capturing dump for dump for '$($process.Name)' with PID '$($process.Id)'."; + Start-Process -FilePath $ProcDumpPath -ArgumentList $procDumpArgs -NoNewWindow -Wait; + } + catch { + Write-Output "There was an error capturing a process dump for '$($process.Name)' with PID '$($process.Id)'." + Write-Warning $_.Exception.Message; + } + } + + Write-Output "Done capturing process dumps."; +} + +$ScriptTrigger = New-JobTrigger -Once -At $WakeTime; + +try { + Register-ScheduledJob -Name $JobName -ScriptBlock $ScriptCode -Trigger $ScriptTrigger -ArgumentList $ProcDumpPath, $ProcDumpOutputPath, $CandidateProcessNames; +} +catch { + Write-Warning "Failed to register scheduled job '$JobName'. Dumps will not be captured for build hangs."; + Write-Warning $_.Exception.Message; +}