Capture dump of hanging test process in Helix (#21659)

This commit is contained in:
Brennan 2020-05-12 13:55:09 -07:00 committed by GitHub
parent 5a0c097ad4
commit f56c61b812
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 143 additions and 52 deletions

View File

@ -5,6 +5,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
@ -19,10 +20,53 @@ namespace RunTests
[DllImport("libc", SetLastError = true, EntryPoint = "kill")]
private static extern int sys_kill(int pid, int sig);
public static Task CaptureDumpAsync()
{
var dumpDirectoryPath = Environment.GetEnvironmentVariable("HELIX_DUMP_FOLDER");
if (dumpDirectoryPath == null)
{
return Task.CompletedTask;
}
var process = Process.GetCurrentProcess();
var dumpFilePath = Path.Combine(dumpDirectoryPath, $"{process.ProcessName}-{process.Id}.dmp");
return CaptureDumpAsync(process.Id, dumpFilePath);
}
public static Task CaptureDumpAsync(int pid)
{
var dumpDirectoryPath = Environment.GetEnvironmentVariable("HELIX_DUMP_FOLDER");
if (dumpDirectoryPath == null)
{
return Task.CompletedTask;
}
var process = Process.GetProcessById(pid);
var dumpFilePath = Path.Combine(dumpDirectoryPath, $"{process.ProcessName}.{process.Id}.dmp");
return CaptureDumpAsync(process.Id, dumpFilePath);
}
public static Task CaptureDumpAsync(int pid, string dumpFilePath)
{
// Skip this on OSX, we know it's unsupported right now
if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
// Can we capture stacks or do a gcdump instead?
return Task.CompletedTask;
}
return RunAsync($"{Environment.GetEnvironmentVariable("HELIX_WORKITEM_ROOT")}/dotnet-dump", $"collect -p {pid} -o \"{dumpFilePath}\"");
}
public static async Task<ProcessResult> RunAsync(
string filename,
string arguments,
string? workingDirectory = null,
string dumpDirectoryPath = null,
bool throwOnError = true,
IDictionary<string, string?>? environmentVariables = null,
Action<string>? outputDataReceived = null,
@ -51,6 +95,14 @@ namespace RunTests
process.StartInfo.WorkingDirectory = workingDirectory;
}
dumpDirectoryPath ??= Environment.GetEnvironmentVariable("HELIX_DUMP_FOLDER");
if (dumpDirectoryPath != null)
{
process.StartInfo.EnvironmentVariables["COMPlus_DbgEnableMiniDump"] = "1";
process.StartInfo.EnvironmentVariables["COMPlus_DbgMiniDumpName"] = Path.Combine(dumpDirectoryPath, $"{Path.GetFileName(filename)}.%d.dmp");
}
if (environmentVariables != null)
{
foreach (var kvp in environmentVariables)
@ -112,13 +164,20 @@ namespace RunTests
process.BeginOutputReadLine();
process.BeginErrorReadLine();
var cancelledTcs = new TaskCompletionSource<object?>();
await using var _ = cancellationToken.Register(() => cancelledTcs.TrySetResult(null));
var canceledTcs = new TaskCompletionSource<object?>();
await using var _ = cancellationToken.Register(() => canceledTcs.TrySetResult(null));
var result = await Task.WhenAny(processLifetimeTask.Task, cancelledTcs.Task);
var result = await Task.WhenAny(processLifetimeTask.Task, canceledTcs.Task);
if (result == cancelledTcs.Task)
if (result == canceledTcs.Task)
{
if (dumpDirectoryPath != null)
{
var dumpFilePath = Path.Combine(dumpDirectoryPath, $"{Path.GetFileName(filename)}.{process.Id}.dmp");
// Capture a process dump if the dumpDirectory is set
await CaptureDumpAsync(process.Id, dumpFilePath);
}
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
sys_kill(process.Id, sig: 2); // SIGINT
@ -143,16 +202,5 @@ namespace RunTests
return await processLifetimeTask.Task;
}
public static void KillProcess(int pid)
{
try
{
using var process = Process.GetProcessById(pid);
process?.Kill();
}
catch (ArgumentException) { }
catch (InvalidOperationException) { }
}
}
}

View File

@ -2,10 +2,6 @@
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using System.CommandLine;
using System.IO;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
namespace RunTests
@ -14,7 +10,7 @@ namespace RunTests
{
static async Task Main(string[] args)
{
try
try
{
var runner = new TestRunner(RunTestsOptions.Parse(args));
@ -27,6 +23,10 @@ namespace RunTests
{
keepGoing = runner.InstallAspNetRefIfNeeded();
}
if (keepGoing)
{
keepGoing = await runner.InstallDotnetDump();
}
runner.DisplayContents();

View File

@ -50,16 +50,21 @@ namespace RunTests
aliases: new string[] { "--ef" },
description: "The version of the EF tool to use")
{ Argument = new Argument<string>(), Required = true },
new Option(
aliases: new string[] { "--aspnetruntime" },
description: "The path to the aspnet runtime nupkg to install")
{ Argument = new Argument<string>(), Required = true },
new Option(
aliases: new string[] { "--aspnetref" },
description: "The path to the aspnet ref nupkg to install")
{ Argument = new Argument<string>(), Required = true },
new Option(
aliases: new string[] { "--helixTimeout" },
description: "The timeout duration of the Helix job")
{ Argument = new Argument<string>(), Required = true },
};
var parseResult = command.Parse(args);
@ -73,6 +78,7 @@ namespace RunTests
options.EfVersion = parseResult.ValueForOption<string>("--ef");
options.AspNetRuntime = parseResult.ValueForOption<string>("--aspnetruntime");
options.AspNetRef = parseResult.ValueForOption<string>("--aspnetref");
options.Timeout = TimeSpan.Parse(parseResult.ValueForOption<string>("--helixTimeout"));
options.HELIX_WORKITEM_ROOT = Environment.GetEnvironmentVariable("HELIX_WORKITEM_ROOT");
options.Path = Environment.GetEnvironmentVariable("PATH");
options.DotnetRoot = Environment.GetEnvironmentVariable("DOTNET_ROOT");
@ -91,5 +97,6 @@ namespace RunTests
public string HELIX_WORKITEM_ROOT { get; set;}
public string DotnetRoot { get; set; }
public string Path { get; set; }
public TimeSpan Timeout { get; set; }
}
}

View File

@ -3,10 +3,10 @@
using System;
using System.Collections.Generic;
using System.CommandLine;
using System.IO;
using System.IO.Compression;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
namespace RunTests
@ -24,7 +24,7 @@ namespace RunTests
public bool SetupEnvironment()
{
try
try
{
// Rename default.NuGet.config to NuGet.config if there is not a custom one from the project
// We use a local NuGet.config file to avoid polluting global machine state and avoid relying on global machine state
@ -32,7 +32,7 @@ namespace RunTests
{
File.Copy("default.NuGet.config", "NuGet.config");
}
EnvironmentVariables.Add("PATH", Options.Path);
EnvironmentVariables.Add("DOTNET_ROOT", Options.DotnetRoot);
EnvironmentVariables.Add("helix", Options.HelixQueue);
@ -68,7 +68,7 @@ namespace RunTests
public void DisplayContents(string path = "./")
{
try
try
{
Console.WriteLine();
Console.WriteLine($"Displaying directory contents for {path}:");
@ -88,9 +88,9 @@ namespace RunTests
}
}
public async Task<bool> InstallAspNetAppIfNeededAsync()
public async Task<bool> InstallAspNetAppIfNeededAsync()
{
try
try
{
if (File.Exists(Options.AspNetRuntime))
{
@ -113,7 +113,7 @@ namespace RunTests
}
}
}
DisplayContents(appRuntimePath);
Console.WriteLine($"Adding current directory to nuget sources: {Options.HELIX_WORKITEM_ROOT}");
@ -152,7 +152,7 @@ namespace RunTests
Options.Path += $"{Environment.GetEnvironmentVariable("DOTNET_CLI_HOME")}/.dotnet/tools";
EnvironmentVariables["PATH"] = Options.Path;
}
else
else
{
Console.WriteLine($"No AspNetRuntime found: {Options.AspNetRuntime}, skipping...");
}
@ -165,19 +165,19 @@ namespace RunTests
}
}
public bool InstallAspNetRefIfNeeded()
public bool InstallAspNetRefIfNeeded()
{
try
try
{
if (File.Exists(Options.AspNetRef))
{
var refPath = $"Microsoft.AspNetCore.App.Ref";
Console.WriteLine($"Found AspNetRef: {Options.AspNetRef}, extracting to {refPath}");
ZipFile.ExtractToDirectory(Options.AspNetRef, "Microsoft.AspNetCore.App.Ref");
DisplayContents(refPath);
}
else
else
{
Console.WriteLine($"No AspNetRef found: {Options.AspNetRef}, skipping...");
}
@ -189,7 +189,28 @@ namespace RunTests
return false;
}
}
public async Task<bool> InstallDotnetDump()
{
try
{
await ProcessUtil.RunAsync($"{Options.DotnetRoot}/dotnet",
$"tool install dotnet-dump --tool-path {Options.HELIX_WORKITEM_ROOT} " +
"--version 5.0.0-* --add-source https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json",
environmentVariables: EnvironmentVariables,
outputDataReceived: Console.WriteLine,
errorDataReceived: Console.Error.WriteLine,
throwOnError: false);
return true;
}
catch (Exception e)
{
Console.WriteLine($"Exception in InstallDotnetDump: {e}");
return false;
}
}
public async Task<bool> CheckTestDiscoveryAsync()
{
try
@ -197,7 +218,8 @@ namespace RunTests
// Run test discovery so we know if there are tests to run
var discoveryResult = await ProcessUtil.RunAsync($"{Options.DotnetRoot}/dotnet",
$"vstest {Options.Target} -lt",
environmentVariables: EnvironmentVariables);
environmentVariables: EnvironmentVariables,
cancellationToken: new CancellationTokenSource(TimeSpan.FromMinutes(2)).Token);
if (discoveryResult.StandardOutput.Contains("Exception thrown"))
{
@ -217,8 +239,10 @@ namespace RunTests
public async Task<int> RunTestsAsync()
{
var exitCode = 0;
try
try
{
// Timeout test run 5 minutes before the Helix job would timeout
var cts = new CancellationTokenSource(Options.Timeout.Subtract(TimeSpan.FromMinutes(5)));
var commonTestArgs = $"vstest {Options.Target} --logger:xunit --logger:\"console;verbosity=normal\" --blame";
if (Options.Quarantined)
{
@ -230,7 +254,8 @@ namespace RunTests
environmentVariables: EnvironmentVariables,
outputDataReceived: Console.WriteLine,
errorDataReceived: Console.Error.WriteLine,
throwOnError: false);
throwOnError: false,
cancellationToken: cts.Token);
if (result.ExitCode != 0)
{
@ -247,7 +272,8 @@ namespace RunTests
environmentVariables: EnvironmentVariables,
outputDataReceived: Console.WriteLine,
errorDataReceived: Console.Error.WriteLine,
throwOnError: false);
throwOnError: false,
cancellationToken: cts.Token);
if (result.ExitCode != 0)
{

View File

@ -3,9 +3,18 @@ REM Need delayed expansion !PATH! so parens in the path don't mess up the parens
setlocal enabledelayedexpansion
REM Use '$' as a variable name prefix to avoid MSBuild variable collisions with these variables
set $target=%1
set $sdkVersion=%2
set $runtimeVersion=%3
set $queue=%4
set $arch=%5
set $quarantined=%6
set $ef=%7
set $aspnetruntime=%8
set $aspnetref=%9
REM Batch only supports up to 9 arguments using the %# syntax, need to shift to get more
shift
set $helixTimeout=%9
set DOTNET_HOME=%HELIX_CORRELATION_PAYLOAD%\sdk
set DOTNET_ROOT=%DOTNET_HOME%\%$arch%
@ -23,10 +32,11 @@ powershell.exe -NoProfile -ExecutionPolicy unrestricted -Command "[Net.ServicePo
set exit_code=0
echo "Restore: dotnet restore RunTests\RunTests.csproj --source https://api.nuget.org/v3/index.json --ignore-failed-sources..."
dotnet restore RunTests\RunTests.csproj --source https://api.nuget.org/v3/index.json --ignore-failed-sources
echo "Running tests: dotnet run --project RunTests\RunTests.csproj -- --target %1 --sdk %2 --runtime %3 --queue %4 --arch %5 --quarantined %6 --ef %7 --aspnetruntime %8 --aspnetref %9..."
dotnet run --project RunTests\RunTests.csproj -- --target %1 --sdk %2 --runtime %3 --queue %4 --arch %5 --quarantined %6 --ef %7 --aspnetruntime %8 --aspnetref %9
if errorlevel 1 (
set exit_code=1
echo "Running tests: dotnet run --project RunTests\RunTests.csproj -- --target %$target% --sdk %$sdkVersion% --runtime %$runtimeVersion% --queue %$queue% --arch %$arch% --quarantined %$quarantined% --ef %$ef% --aspnetruntime %$aspnetruntime% --aspnetref %$aspnetref% --helixTimeout %$helixTimeout%..."
dotnet run --project RunTests\RunTests.csproj -- --target %$target% --sdk %$sdkVersion% --runtime %$runtimeVersion% --queue %$queue% --arch %$arch% --quarantined %$quarantined% --ef %$ef% --aspnetruntime %$aspnetruntime% --aspnetref %$aspnetref% --helixTimeout %$helixTimeout%
if errorlevel neq 0 (
set exit_code=%errorlevel%
)
echo "Finished running tests: exit_code=%exit_code%"
exit /b %exit_code%

View File

@ -88,8 +88,8 @@ sync
exit_code=0
echo "Restore: $DOTNET_ROOT/dotnet restore RunTests/RunTests.csproj --source https://api.nuget.org/v3/index.json --ignore-failed-sources..."
$DOTNET_ROOT/dotnet restore RunTests/RunTests.csproj --source https://api.nuget.org/v3/index.json --ignore-failed-sources
echo "Running tests: $DOTNET_ROOT/dotnet run --project RunTests/RunTests.csproj -- --target $1 --sdk $2 --runtime $3 --queue $4 --arch $5 --quarantined $6 --ef $7 --aspnetruntime $8 --aspnetref $9..."
$DOTNET_ROOT/dotnet run --project RunTests/RunTests.csproj -- --target $1 --sdk $2 --runtime $3 --queue $4 --arch $5 --quarantined $6 --ef $7 --aspnetruntime $8 --aspnetref $9
echo "Running tests: $DOTNET_ROOT/dotnet run --project RunTests/RunTests.csproj -- --target $1 --sdk $2 --runtime $3 --queue $4 --arch $5 --quarantined $6 --ef $7 --aspnetruntime $8 --aspnetref $9 --helixTimeout ${10}..."
$DOTNET_ROOT/dotnet run --project RunTests/RunTests.csproj -- --target $1 --sdk $2 --runtime $3 --queue $4 --arch $5 --quarantined $6 --ef $7 --aspnetruntime $8 --aspnetref $9 --helixTimeout ${10}
exit_code=$?
echo "Finished tests...exit_code=$exit_code"
exit $exit_code

View File

@ -19,11 +19,11 @@
<ItemGroup Condition="'$(IsHelixJob)' == 'true' AND '$(TestDependsOnAspNetRef)' == 'true' AND '$(IsTargetingPackBuilding)' == 'true'">
<HelixContent Include="$(RepoRoot)artifacts\packages\Release\Shipping\Microsoft.AspNetCore.App.Ref.$(AppRuntimeVersion).nupkg" />
</ItemGroup>
<ItemGroup Condition="'$(IsHelixJob)' == 'true' AND '$(TestDependsOnAspNetRuntime)' == 'true'">
<HelixContent Include="$(RepoRoot)artifacts\packages\Release\Shipping\*-ci.nupkg" />
</ItemGroup>
</ItemGroup>
<!-- Item group has to be defined here becasue Helix.props is evaluated before xunit.runner.console.props -->
<ItemGroup Condition="$(BuildHelixPayload)">
<Content Include="@(HelixContent)" />
@ -68,8 +68,8 @@ Usage: dotnet msbuild /t:Helix src/MyTestProject.csproj
<_Temp Include="@(HelixAvailableTargetQueue)" />
<HelixAvailableTargetQueue Remove="@(HelixAvailableTargetQueue)" />
<HelixAvailableTargetQueue Include="@(_Temp->'%(Identity)'->Replace('.Open',''))" />
</ItemGroup>
</ItemGroup>
<ItemGroup>
<!-- Include default queues based on platform -->
<_HelixProjectTargetQueue Include="%(HelixAvailableTargetQueue.Identity)" Condition="'%(HelixAvailableTargetQueue.Identity)' != '' AND '$(_SelectedPlatforms.Contains(%(Platform)))' == 'true'" />
@ -115,8 +115,8 @@ Usage: dotnet msbuild /t:Helix src/MyTestProject.csproj
<TestAssembly>$(TargetFileName)</TestAssembly>
<PreCommands>@(HelixPreCommand)</PreCommands>
<PostCommands>@(HelixPostCommand)</PostCommands>
<Command Condition="$(IsWindowsHelixQueue)">call runtests.cmd $(TargetFileName) $(NETCoreSdkVersion) $(MicrosoftNETCoreAppRuntimeVersion) $(_HelixFriendlyNameTargetQueue) $(TargetArchitecture) $(RunQuarantinedTests) $(DotnetEfPackageVersion) Microsoft.AspNetCore.App.Runtime.win-x64.$(AppRuntimeVersion).nupkg Microsoft.AspNetCore.App.Ref.$(AppRuntimeVersion).nupkg</Command>
<Command Condition="!$(IsWindowsHelixQueue)">./runtests.sh $(TargetFileName) $(NETCoreSdkVersion) $(MicrosoftNETCoreAppRuntimeVersion) $(_HelixFriendlyNameTargetQueue) $(TargetArchitecture) $(RunQuarantinedTests) $(DotnetEfPackageVersion) Microsoft.AspNetCore.App.Runtime.win-x64.$(AppRuntimeVersion).nupkg Microsoft.AspNetCore.App.Ref.$(AppRuntimeVersion).nupkg</Command>
<Command Condition="$(IsWindowsHelixQueue)">call runtests.cmd $(TargetFileName) $(NETCoreSdkVersion) $(MicrosoftNETCoreAppRuntimeVersion) $(_HelixFriendlyNameTargetQueue) $(TargetArchitecture) $(RunQuarantinedTests) $(DotnetEfPackageVersion) Microsoft.AspNetCore.App.Runtime.win-x64.$(AppRuntimeVersion).nupkg Microsoft.AspNetCore.App.Ref.$(AppRuntimeVersion).nupkg $(HelixTimeout)</Command>
<Command Condition="!$(IsWindowsHelixQueue)">./runtests.sh $(TargetFileName) $(NETCoreSdkVersion) $(MicrosoftNETCoreAppRuntimeVersion) $(_HelixFriendlyNameTargetQueue) $(TargetArchitecture) $(RunQuarantinedTests) $(DotnetEfPackageVersion) Microsoft.AspNetCore.App.Runtime.win-x64.$(AppRuntimeVersion).nupkg Microsoft.AspNetCore.App.Ref.$(AppRuntimeVersion).nupkg $(HelixTimeout)</Command>
<Command Condition="$(HelixCommand) != ''">$(HelixCommand)</Command>
<Timeout>$(HelixTimeout)</Timeout>
</HelixWorkItem>