Productionize Path tokenization

This commit is contained in:
Ryan Nowak 2018-06-16 17:20:28 -07:00
parent 6efd679ce3
commit 30fcfb65ea
12 changed files with 380 additions and 71 deletions

View File

@ -0,0 +1,34 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
namespace Microsoft.AspNetCore.Routing.Matchers
{
public abstract class FastPathTokenizerBenchmarkBase
{
internal unsafe void NaiveBaseline(string path, PathSegment * segments, int maxCount)
{
int count = 0;
int start = 1; // Paths always start with a leading /
int end;
while ((end = path.IndexOf('/', start)) >= 0 && count < maxCount)
{
segments[count++] = new PathSegment(start, end - start);
start = end + 1; // resume search after the current character
}
// Residue
var length = path.Length - start;
if (length > 0 && count < maxCount)
{
segments[count++] = new PathSegment(start, length);
}
}
internal unsafe void MinimalBaseline(string path, PathSegment* segments, int maxCount)
{
var start = 1;
var length = path.Length - start;
segments[0] = new PathSegment(start, length);
}
}
}

View File

@ -0,0 +1,32 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using BenchmarkDotNet.Attributes;
namespace Microsoft.AspNetCore.Routing.Matchers
{
public class FastPathTokenizerEmptyBenchmark : FastPathTokenizerBenchmarkBase
{
private const int MaxCount = 32;
private static readonly string Input = "/";
// This is super hardcoded implementation for comparison, we dont't expect to do better.
[Benchmark(Baseline = true)]
public unsafe void Baseline()
{
var path = Input;
var segments = stackalloc PathSegment[MaxCount];
MinimalBaseline(path, segments, MaxCount);
}
[Benchmark]
public unsafe void Implementation()
{
var path = Input;
var segments = stackalloc PathSegment[MaxCount];
FastPathTokenizer.Tokenize(path, segments, MaxCount);
}
}
}

View File

@ -0,0 +1,36 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using BenchmarkDotNet.Attributes;
namespace Microsoft.AspNetCore.Routing.Matchers
{
public class FastPathTokenizerLargeBenchmark : FastPathTokenizerBenchmarkBase
{
private static readonly int MaxCount = 32;
private static readonly string Input =
"/heeeeeeeeeeyyyyyyyyyyy/this/is/a/string/with/lots/of/segments" +
"/hoooooooooooooooooooooooooooooooooow long/do you think it should be?/I think" +
"/like/32/segments/is /a/goood/number/dklfl/20303/dlflkf" +
"/Im/tired/of/thinking/of/more/things/to/so";
// This is a naive reference implementation. We expect to do better.
[Benchmark(Baseline = true)]
public unsafe void Baseline()
{
var path = Input;
var segments = stackalloc PathSegment[MaxCount];
NaiveBaseline(path, segments, MaxCount);
}
[Benchmark]
public unsafe void Implementation()
{
var path = Input;
var segments = stackalloc PathSegment[MaxCount];
FastPathTokenizer.Tokenize(path, segments, MaxCount);
}
}
}

View File

@ -0,0 +1,32 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using BenchmarkDotNet.Attributes;
namespace Microsoft.AspNetCore.Routing.Matchers
{
public class FastPathTokenizerPlaintextBenchmark : FastPathTokenizerBenchmarkBase
{
private const int MaxCount = 32;
private static readonly string Input = "/plaintext";
// This is super hardcoded implementation for comparison, we dont't expect to do better.
[Benchmark(Baseline = true)]
public unsafe void Baseline()
{
var path = Input;
var segments = stackalloc PathSegment[MaxCount];
MinimalBaseline(path, segments, MaxCount);
}
[Benchmark]
public unsafe void Implementation()
{
var path = Input;
var segments = stackalloc PathSegment[MaxCount];
FastPathTokenizer.Tokenize(path, segments, MaxCount);
}
}
}

View File

@ -0,0 +1,32 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using BenchmarkDotNet.Attributes;
namespace Microsoft.AspNetCore.Routing.Matchers
{
public class FastPathTokenizerSmallBenchmark : FastPathTokenizerBenchmarkBase
{
private const int MaxCount = 32;
private static readonly string Input = "/hello/world/cool";
// This is a naive reference implementation. We expect to do better.
[Benchmark(Baseline = true)]
public unsafe void Baseline()
{
var path = Input;
var segments = stackalloc PathSegment[MaxCount];
NaiveBaseline(path, segments, MaxCount);
}
[Benchmark]
public unsafe void Implementation()
{
var path = Input;
var segments = stackalloc PathSegment[MaxCount];
FastPathTokenizer.Tokenize(path, segments, MaxCount);
}
}
}

View File

@ -1,8 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netcoreapp2.0;net461</TargetFrameworks>
<TargetFrameworks Condition=" '$(OS)' != 'Windows_NT' ">netcoreapp2.0</TargetFrameworks>
<TargetFrameworks>netcoreapp2.2</TargetFrameworks>
<OutputType>Exe</OutputType>
<ServerGarbageCollection>true</ServerGarbageCollection>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>

View File

@ -0,0 +1,40 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
namespace Microsoft.AspNetCore.Routing.Matchers
{
// Low level implementation of our path tokenization algorithm. Alternative
// to PathTokenizer.
internal static class FastPathTokenizer
{
// This section tokenizes the path by marking the sequence of slashes, and their
// and the length of the text between them.
//
// If there is residue (text after last slash) then the length of the segment will
// computed based on the string length.
public static unsafe int Tokenize(string path, PathSegment* segments, int maxCount)
{
int count = 0;
int start = 1; // Paths always start with a leading /
int end;
var span = path.AsSpan(start);
while ((end = span.IndexOf('/')) >= 0 && count < maxCount)
{
segments[count++] = new PathSegment(start, end);
start += end + 1; // resume search after the current character
span = path.AsSpan(start);
}
// Residue
var length = span.Length;
if (length > 0 && count < maxCount)
{
segments[count++] = new PathSegment(start, length);
}
return count;
}
}
}

View File

@ -0,0 +1,39 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
namespace Microsoft.AspNetCore.Routing.Matchers
{
internal readonly struct PathSegment : IEquatable<PathSegment>
{
public readonly int Start;
public readonly int Length;
public PathSegment(int start, int length)
{
Start = start;
Length = length;
}
public override bool Equals(object obj)
{
return obj is PathSegment segment ? Equals(segment) : false;
}
public bool Equals(PathSegment other)
{
return Start == other.Start && Length == other.Length;
}
public override int GetHashCode()
{
return Start;
}
public override string ToString()
{
return $"Segment({Start}:{Length})";
}
}
}

View File

@ -8,6 +8,7 @@ Microsoft.AspNetCore.Routing.RouteCollection</Description>
<NoWarn>$(NoWarn);CS1591</NoWarn>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<PackageTags>aspnetcore;routing</PackageTags>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>

View File

@ -3,7 +3,6 @@
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Http;
@ -34,33 +33,12 @@ namespace Microsoft.AspNetCore.Routing.Matchers
var current = 0;
var path = httpContext.Request.Path.Value;
// This section tokenizes the path by marking the sequence of slashes, and their
// position in the string. The consuming code uses the sequence and the count of
// slashes to deduce the length of each segment.
//
// If there is residue (text after last slash) then the length of the segment will
// computed based on the string length.
var buffer = stackalloc Segment[32];
var segment = 0;
var start = 1; // PathString guarantees a leading /
var end = 0;
var length = 0;
while ((end = path.IndexOf('/', start)) >= 0 && segment < 32)
var buffer = stackalloc PathSegment[32];
var count = FastPathTokenizer.Tokenize(path, buffer, 32);
for (var i = 0; i < count; i++)
{
length = end - start;
buffer[segment++] = new Segment() { Start = start, Length = length, };
current = states[current].Transitions.GetDestination(path, start, length);
start = end + 1; // resume search after the current character
}
// Residue
length = path.Length - start;
if (length > 0)
{
buffer[segment++] = new Segment() { Start = start, Length = length, };
current = states[current].Transitions.GetDestination(path, start, length);
current = states[current].Transitions.GetDestination(buffer, i, path);
}
var matches = new List<(Endpoint, RouteValueDictionary)>();
@ -98,12 +76,6 @@ namespace Microsoft.AspNetCore.Routing.Matchers
return Task.CompletedTask;
}
public struct Segment
{
public int Start;
public int Length;
}
public struct State
{
public bool IsAccepting;
@ -119,7 +91,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers
public abstract class JumpTable
{
public abstract int GetDestination(string text, int start, int length);
public unsafe abstract int GetDestination(PathSegment* segments, int depth, string path);
}
public class JumpTableBuilder
@ -154,17 +126,18 @@ namespace Microsoft.AspNetCore.Routing.Matchers
_entries = entries;
}
public override int GetDestination(string text, int start, int length)
public unsafe override int GetDestination(PathSegment* segments, int depth, string path)
{
for (var i = 0; i < _entries.Length; i++)
{
if (length == _entries[i].text.Length &&
var segment = segments[depth];
if (segment.Length == _entries[i].text.Length &&
string.Compare(
text,
start,
path,
segment.Start,
_entries[i].text,
0,
length,
segment.Length,
StringComparison.OrdinalIgnoreCase) == 0)
{
return _entries[i].destination;

View File

@ -0,0 +1,117 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using Xunit;
namespace Microsoft.AspNetCore.Routing.Matchers
{
public unsafe class FastPathTokenizerTest
{
[Fact] // Note: tokenizing a truly empty string is undefined.
public void Tokenize_EmptyPath()
{
// Arrange
var segments = stackalloc PathSegment[32];
// Act
var count = FastPathTokenizer.Tokenize("/", segments, 1);
// Assert
Assert.Equal(0, count);
}
[Fact]
public void Tokenize_SingleSegment()
{
// Arrange
var segments = stackalloc PathSegment[32];
// Act
var count = FastPathTokenizer.Tokenize("/abc", segments, 1);
// Assert
Assert.Equal(1, count);
Assert.Equal(new PathSegment(1, 3), segments[0]);
}
[Fact]
public void Tokenize_WithSomeSegments()
{
// Arrange
var segments = stackalloc PathSegment[32];
// Act
var count = FastPathTokenizer.Tokenize("/a/b/c", segments, 3);
// Assert
Assert.Equal(3, count);
Assert.Equal(new PathSegment(1, 1), segments[0]);
Assert.Equal(new PathSegment(3, 1), segments[1]);
Assert.Equal(new PathSegment(5, 1), segments[2]);
}
[Fact] // Empty trailing / is ignored
public void Tokenize_WithSomeSegments_TrailingSlash()
{
// Arrange
var segments = stackalloc PathSegment[32];
// Act
var count = FastPathTokenizer.Tokenize("/a/b/c/", segments, 3);
// Assert
Assert.Equal(3, count);
Assert.Equal(new PathSegment(1, 1), segments[0]);
Assert.Equal(new PathSegment(3, 1), segments[1]);
Assert.Equal(new PathSegment(5, 1), segments[2]);
}
[Fact]
public void Tokenize_LongerSegments()
{
// Arrange
var segments = stackalloc PathSegment[32];
// Act
var count = FastPathTokenizer.Tokenize("/aaa/bb/ccccc", segments, 3);
// Assert
Assert.Equal(3, count);
Assert.Equal(new PathSegment(1, 3), segments[0]);
Assert.Equal(new PathSegment(5, 2), segments[1]);
Assert.Equal(new PathSegment(8, 5), segments[2]);
}
[Fact]
public void Tokenize_EmptySegments()
{
// Arrange
var segments = stackalloc PathSegment[32];
// Act
var count = FastPathTokenizer.Tokenize("///c", segments, 3);
// Assert
Assert.Equal(3, count);
Assert.Equal(new PathSegment(1, 0), segments[0]);
Assert.Equal(new PathSegment(2, 0), segments[1]);
Assert.Equal(new PathSegment(3, 1), segments[2]);
}
[Fact]
public void Tokenize_TooManySegments()
{
// Arrange
var segments = stackalloc PathSegment[32];
// Act
var count = FastPathTokenizer.Tokenize("/a/b/c/d", segments, 3);
// Assert
Assert.Equal(3, count);
Assert.Equal(new PathSegment(1, 1), segments[0]);
Assert.Equal(new PathSegment(3, 1), segments[1]);
Assert.Equal(new PathSegment(5, 1), segments[2]);
}
}
}

View File

@ -39,28 +39,8 @@ namespace Microsoft.AspNetCore.Routing.Matchers
var state = _state;
var path = httpContext.Request.Path.Value;
// This section tokenizes the path by marking the sequence of slashes, and their
// position in the string. The consuming code uses the sequence and the count of
// slashes to deduce the length of each segment.
//
// If there is residue (text after last slash) then the length of the segment will
// computed based on the string length.
var buffer = stackalloc Segment[32];
var count = 0;
var start = 1; // PathString guarantees a leading /
var end = 0;
while ((end = path.IndexOf('/', start)) >= 0 && count < 32)
{
buffer[count++] = new Segment() { Start = start, Length = end - start, };
start = end + 1; // resume search after the current character
}
// Residue
if (start < path.Length)
{
buffer[count++] = new Segment() { Start = start, Length = path.Length - start, };
}
var buffer = stackalloc PathSegment[32];
var count = FastPathTokenizer.Tokenize(path, buffer, 32);
var i = 0;
var candidates = new List<Candidate>();
@ -125,12 +105,6 @@ namespace Microsoft.AspNetCore.Routing.Matchers
return Task.CompletedTask;
}
public struct Segment
{
public int Start;
public int Length;
}
public struct Candidate
{
public Endpoint Endpoint;
@ -173,7 +147,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers
public abstract class JumpTable
{
public unsafe abstract int GetDestination(Segment* segments, int count, string path);
public unsafe abstract int GetDestination(PathSegment* segments, int depth, string path);
}
public class JumpTableBuilder
@ -208,7 +182,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers
_entries = entries;
}
public unsafe override int GetDestination(Segment* segments, int count, string path)
public unsafe override int GetDestination(PathSegment* segments, int count, string path)
{
if (_depth == count)
{