Productionize Path tokenization
This commit is contained in:
parent
6efd679ce3
commit
30fcfb65ea
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
namespace Microsoft.AspNetCore.Routing.Matchers
|
||||
{
|
||||
public abstract class FastPathTokenizerBenchmarkBase
|
||||
{
|
||||
internal unsafe void NaiveBaseline(string path, PathSegment * segments, int maxCount)
|
||||
{
|
||||
int count = 0;
|
||||
int start = 1; // Paths always start with a leading /
|
||||
int end;
|
||||
while ((end = path.IndexOf('/', start)) >= 0 && count < maxCount)
|
||||
{
|
||||
segments[count++] = new PathSegment(start, end - start);
|
||||
start = end + 1; // resume search after the current character
|
||||
}
|
||||
|
||||
// Residue
|
||||
var length = path.Length - start;
|
||||
if (length > 0 && count < maxCount)
|
||||
{
|
||||
segments[count++] = new PathSegment(start, length);
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe void MinimalBaseline(string path, PathSegment* segments, int maxCount)
|
||||
{
|
||||
var start = 1;
|
||||
var length = path.Length - start;
|
||||
segments[0] = new PathSegment(start, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using BenchmarkDotNet.Attributes;
|
||||
|
||||
namespace Microsoft.AspNetCore.Routing.Matchers
|
||||
{
|
||||
public class FastPathTokenizerEmptyBenchmark : FastPathTokenizerBenchmarkBase
|
||||
{
|
||||
private const int MaxCount = 32;
|
||||
private static readonly string Input = "/";
|
||||
|
||||
// This is super hardcoded implementation for comparison, we dont't expect to do better.
|
||||
[Benchmark(Baseline = true)]
|
||||
public unsafe void Baseline()
|
||||
{
|
||||
var path = Input;
|
||||
var segments = stackalloc PathSegment[MaxCount];
|
||||
|
||||
MinimalBaseline(path, segments, MaxCount);
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public unsafe void Implementation()
|
||||
{
|
||||
var path = Input;
|
||||
var segments = stackalloc PathSegment[MaxCount];
|
||||
|
||||
FastPathTokenizer.Tokenize(path, segments, MaxCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using BenchmarkDotNet.Attributes;
|
||||
|
||||
namespace Microsoft.AspNetCore.Routing.Matchers
|
||||
{
|
||||
public class FastPathTokenizerLargeBenchmark : FastPathTokenizerBenchmarkBase
|
||||
{
|
||||
private static readonly int MaxCount = 32;
|
||||
private static readonly string Input =
|
||||
"/heeeeeeeeeeyyyyyyyyyyy/this/is/a/string/with/lots/of/segments" +
|
||||
"/hoooooooooooooooooooooooooooooooooow long/do you think it should be?/I think" +
|
||||
"/like/32/segments/is /a/goood/number/dklfl/20303/dlflkf" +
|
||||
"/Im/tired/of/thinking/of/more/things/to/so";
|
||||
|
||||
// This is a naive reference implementation. We expect to do better.
|
||||
[Benchmark(Baseline = true)]
|
||||
public unsafe void Baseline()
|
||||
{
|
||||
var path = Input;
|
||||
var segments = stackalloc PathSegment[MaxCount];
|
||||
|
||||
NaiveBaseline(path, segments, MaxCount);
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public unsafe void Implementation()
|
||||
{
|
||||
var path = Input;
|
||||
var segments = stackalloc PathSegment[MaxCount];
|
||||
|
||||
FastPathTokenizer.Tokenize(path, segments, MaxCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using BenchmarkDotNet.Attributes;
|
||||
|
||||
namespace Microsoft.AspNetCore.Routing.Matchers
|
||||
{
|
||||
public class FastPathTokenizerPlaintextBenchmark : FastPathTokenizerBenchmarkBase
|
||||
{
|
||||
private const int MaxCount = 32;
|
||||
private static readonly string Input = "/plaintext";
|
||||
|
||||
// This is super hardcoded implementation for comparison, we dont't expect to do better.
|
||||
[Benchmark(Baseline = true)]
|
||||
public unsafe void Baseline()
|
||||
{
|
||||
var path = Input;
|
||||
var segments = stackalloc PathSegment[MaxCount];
|
||||
|
||||
MinimalBaseline(path, segments, MaxCount);
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public unsafe void Implementation()
|
||||
{
|
||||
var path = Input;
|
||||
var segments = stackalloc PathSegment[MaxCount];
|
||||
|
||||
FastPathTokenizer.Tokenize(path, segments, MaxCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using BenchmarkDotNet.Attributes;
|
||||
|
||||
namespace Microsoft.AspNetCore.Routing.Matchers
|
||||
{
|
||||
public class FastPathTokenizerSmallBenchmark : FastPathTokenizerBenchmarkBase
|
||||
{
|
||||
private const int MaxCount = 32;
|
||||
private static readonly string Input = "/hello/world/cool";
|
||||
|
||||
// This is a naive reference implementation. We expect to do better.
|
||||
[Benchmark(Baseline = true)]
|
||||
public unsafe void Baseline()
|
||||
{
|
||||
var path = Input;
|
||||
var segments = stackalloc PathSegment[MaxCount];
|
||||
|
||||
NaiveBaseline(path, segments, MaxCount);
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public unsafe void Implementation()
|
||||
{
|
||||
var path = Input;
|
||||
var segments = stackalloc PathSegment[MaxCount];
|
||||
|
||||
FastPathTokenizer.Tokenize(path, segments, MaxCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,8 +1,7 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFrameworks>netcoreapp2.0;net461</TargetFrameworks>
|
||||
<TargetFrameworks Condition=" '$(OS)' != 'Windows_NT' ">netcoreapp2.0</TargetFrameworks>
|
||||
<TargetFrameworks>netcoreapp2.2</TargetFrameworks>
|
||||
<OutputType>Exe</OutputType>
|
||||
<ServerGarbageCollection>true</ServerGarbageCollection>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
|
||||
namespace Microsoft.AspNetCore.Routing.Matchers
|
||||
{
|
||||
// Low level implementation of our path tokenization algorithm. Alternative
|
||||
// to PathTokenizer.
|
||||
internal static class FastPathTokenizer
|
||||
{
|
||||
// This section tokenizes the path by marking the sequence of slashes, and their
|
||||
// and the length of the text between them.
|
||||
//
|
||||
// If there is residue (text after last slash) then the length of the segment will
|
||||
// computed based on the string length.
|
||||
public static unsafe int Tokenize(string path, PathSegment* segments, int maxCount)
|
||||
{
|
||||
int count = 0;
|
||||
int start = 1; // Paths always start with a leading /
|
||||
int end;
|
||||
var span = path.AsSpan(start);
|
||||
while ((end = span.IndexOf('/')) >= 0 && count < maxCount)
|
||||
{
|
||||
segments[count++] = new PathSegment(start, end);
|
||||
start += end + 1; // resume search after the current character
|
||||
span = path.AsSpan(start);
|
||||
}
|
||||
|
||||
// Residue
|
||||
var length = span.Length;
|
||||
if (length > 0 && count < maxCount)
|
||||
{
|
||||
segments[count++] = new PathSegment(start, length);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
|
||||
namespace Microsoft.AspNetCore.Routing.Matchers
|
||||
{
|
||||
internal readonly struct PathSegment : IEquatable<PathSegment>
|
||||
{
|
||||
public readonly int Start;
|
||||
public readonly int Length;
|
||||
|
||||
public PathSegment(int start, int length)
|
||||
{
|
||||
Start = start;
|
||||
Length = length;
|
||||
}
|
||||
|
||||
public override bool Equals(object obj)
|
||||
{
|
||||
return obj is PathSegment segment ? Equals(segment) : false;
|
||||
}
|
||||
|
||||
public bool Equals(PathSegment other)
|
||||
{
|
||||
return Start == other.Start && Length == other.Length;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return Start;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"Segment({Start}:{Length})";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@ Microsoft.AspNetCore.Routing.RouteCollection</Description>
|
|||
<NoWarn>$(NoWarn);CS1591</NoWarn>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<PackageTags>aspnetcore;routing</PackageTags>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Specialized;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
|
||||
|
|
@ -34,33 +33,12 @@ namespace Microsoft.AspNetCore.Routing.Matchers
|
|||
var current = 0;
|
||||
|
||||
var path = httpContext.Request.Path.Value;
|
||||
|
||||
// This section tokenizes the path by marking the sequence of slashes, and their
|
||||
// position in the string. The consuming code uses the sequence and the count of
|
||||
// slashes to deduce the length of each segment.
|
||||
//
|
||||
// If there is residue (text after last slash) then the length of the segment will
|
||||
// computed based on the string length.
|
||||
var buffer = stackalloc Segment[32];
|
||||
var segment = 0;
|
||||
var start = 1; // PathString guarantees a leading /
|
||||
var end = 0;
|
||||
var length = 0;
|
||||
while ((end = path.IndexOf('/', start)) >= 0 && segment < 32)
|
||||
var buffer = stackalloc PathSegment[32];
|
||||
var count = FastPathTokenizer.Tokenize(path, buffer, 32);
|
||||
|
||||
for (var i = 0; i < count; i++)
|
||||
{
|
||||
length = end - start;
|
||||
buffer[segment++] = new Segment() { Start = start, Length = length, };
|
||||
current = states[current].Transitions.GetDestination(path, start, length);
|
||||
|
||||
start = end + 1; // resume search after the current character
|
||||
}
|
||||
|
||||
// Residue
|
||||
length = path.Length - start;
|
||||
if (length > 0)
|
||||
{
|
||||
buffer[segment++] = new Segment() { Start = start, Length = length, };
|
||||
current = states[current].Transitions.GetDestination(path, start, length);
|
||||
current = states[current].Transitions.GetDestination(buffer, i, path);
|
||||
}
|
||||
|
||||
var matches = new List<(Endpoint, RouteValueDictionary)>();
|
||||
|
|
@ -98,12 +76,6 @@ namespace Microsoft.AspNetCore.Routing.Matchers
|
|||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public struct Segment
|
||||
{
|
||||
public int Start;
|
||||
public int Length;
|
||||
}
|
||||
|
||||
public struct State
|
||||
{
|
||||
public bool IsAccepting;
|
||||
|
|
@ -119,7 +91,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers
|
|||
|
||||
public abstract class JumpTable
|
||||
{
|
||||
public abstract int GetDestination(string text, int start, int length);
|
||||
public unsafe abstract int GetDestination(PathSegment* segments, int depth, string path);
|
||||
}
|
||||
|
||||
public class JumpTableBuilder
|
||||
|
|
@ -154,17 +126,18 @@ namespace Microsoft.AspNetCore.Routing.Matchers
|
|||
_entries = entries;
|
||||
}
|
||||
|
||||
public override int GetDestination(string text, int start, int length)
|
||||
public unsafe override int GetDestination(PathSegment* segments, int depth, string path)
|
||||
{
|
||||
for (var i = 0; i < _entries.Length; i++)
|
||||
{
|
||||
if (length == _entries[i].text.Length &&
|
||||
var segment = segments[depth];
|
||||
if (segment.Length == _entries[i].text.Length &&
|
||||
string.Compare(
|
||||
text,
|
||||
start,
|
||||
path,
|
||||
segment.Start,
|
||||
_entries[i].text,
|
||||
0,
|
||||
length,
|
||||
segment.Length,
|
||||
StringComparison.OrdinalIgnoreCase) == 0)
|
||||
{
|
||||
return _entries[i].destination;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,117 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.AspNetCore.Routing.Matchers
|
||||
{
|
||||
public unsafe class FastPathTokenizerTest
|
||||
{
|
||||
[Fact] // Note: tokenizing a truly empty string is undefined.
|
||||
public void Tokenize_EmptyPath()
|
||||
{
|
||||
// Arrange
|
||||
var segments = stackalloc PathSegment[32];
|
||||
|
||||
// Act
|
||||
var count = FastPathTokenizer.Tokenize("/", segments, 1);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(0, count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Tokenize_SingleSegment()
|
||||
{
|
||||
// Arrange
|
||||
var segments = stackalloc PathSegment[32];
|
||||
|
||||
// Act
|
||||
var count = FastPathTokenizer.Tokenize("/abc", segments, 1);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(1, count);
|
||||
Assert.Equal(new PathSegment(1, 3), segments[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Tokenize_WithSomeSegments()
|
||||
{
|
||||
// Arrange
|
||||
var segments = stackalloc PathSegment[32];
|
||||
|
||||
// Act
|
||||
var count = FastPathTokenizer.Tokenize("/a/b/c", segments, 3);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(3, count);
|
||||
Assert.Equal(new PathSegment(1, 1), segments[0]);
|
||||
Assert.Equal(new PathSegment(3, 1), segments[1]);
|
||||
Assert.Equal(new PathSegment(5, 1), segments[2]);
|
||||
}
|
||||
|
||||
[Fact] // Empty trailing / is ignored
|
||||
public void Tokenize_WithSomeSegments_TrailingSlash()
|
||||
{
|
||||
// Arrange
|
||||
var segments = stackalloc PathSegment[32];
|
||||
|
||||
// Act
|
||||
var count = FastPathTokenizer.Tokenize("/a/b/c/", segments, 3);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(3, count);
|
||||
Assert.Equal(new PathSegment(1, 1), segments[0]);
|
||||
Assert.Equal(new PathSegment(3, 1), segments[1]);
|
||||
Assert.Equal(new PathSegment(5, 1), segments[2]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Tokenize_LongerSegments()
|
||||
{
|
||||
// Arrange
|
||||
var segments = stackalloc PathSegment[32];
|
||||
|
||||
// Act
|
||||
var count = FastPathTokenizer.Tokenize("/aaa/bb/ccccc", segments, 3);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(3, count);
|
||||
Assert.Equal(new PathSegment(1, 3), segments[0]);
|
||||
Assert.Equal(new PathSegment(5, 2), segments[1]);
|
||||
Assert.Equal(new PathSegment(8, 5), segments[2]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Tokenize_EmptySegments()
|
||||
{
|
||||
// Arrange
|
||||
var segments = stackalloc PathSegment[32];
|
||||
|
||||
// Act
|
||||
var count = FastPathTokenizer.Tokenize("///c", segments, 3);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(3, count);
|
||||
Assert.Equal(new PathSegment(1, 0), segments[0]);
|
||||
Assert.Equal(new PathSegment(2, 0), segments[1]);
|
||||
Assert.Equal(new PathSegment(3, 1), segments[2]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Tokenize_TooManySegments()
|
||||
{
|
||||
// Arrange
|
||||
var segments = stackalloc PathSegment[32];
|
||||
|
||||
// Act
|
||||
var count = FastPathTokenizer.Tokenize("/a/b/c/d", segments, 3);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(3, count);
|
||||
Assert.Equal(new PathSegment(1, 1), segments[0]);
|
||||
Assert.Equal(new PathSegment(3, 1), segments[1]);
|
||||
Assert.Equal(new PathSegment(5, 1), segments[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -39,28 +39,8 @@ namespace Microsoft.AspNetCore.Routing.Matchers
|
|||
var state = _state;
|
||||
|
||||
var path = httpContext.Request.Path.Value;
|
||||
|
||||
// This section tokenizes the path by marking the sequence of slashes, and their
|
||||
// position in the string. The consuming code uses the sequence and the count of
|
||||
// slashes to deduce the length of each segment.
|
||||
//
|
||||
// If there is residue (text after last slash) then the length of the segment will
|
||||
// computed based on the string length.
|
||||
var buffer = stackalloc Segment[32];
|
||||
var count = 0;
|
||||
var start = 1; // PathString guarantees a leading /
|
||||
var end = 0;
|
||||
while ((end = path.IndexOf('/', start)) >= 0 && count < 32)
|
||||
{
|
||||
buffer[count++] = new Segment() { Start = start, Length = end - start, };
|
||||
start = end + 1; // resume search after the current character
|
||||
}
|
||||
|
||||
// Residue
|
||||
if (start < path.Length)
|
||||
{
|
||||
buffer[count++] = new Segment() { Start = start, Length = path.Length - start, };
|
||||
}
|
||||
var buffer = stackalloc PathSegment[32];
|
||||
var count = FastPathTokenizer.Tokenize(path, buffer, 32);
|
||||
|
||||
var i = 0;
|
||||
var candidates = new List<Candidate>();
|
||||
|
|
@ -125,12 +105,6 @@ namespace Microsoft.AspNetCore.Routing.Matchers
|
|||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public struct Segment
|
||||
{
|
||||
public int Start;
|
||||
public int Length;
|
||||
}
|
||||
|
||||
public struct Candidate
|
||||
{
|
||||
public Endpoint Endpoint;
|
||||
|
|
@ -173,7 +147,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers
|
|||
|
||||
public abstract class JumpTable
|
||||
{
|
||||
public unsafe abstract int GetDestination(Segment* segments, int count, string path);
|
||||
public unsafe abstract int GetDestination(PathSegment* segments, int depth, string path);
|
||||
}
|
||||
|
||||
public class JumpTableBuilder
|
||||
|
|
@ -208,7 +182,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers
|
|||
_entries = entries;
|
||||
}
|
||||
|
||||
public unsafe override int GetDestination(Segment* segments, int count, string path)
|
||||
public unsafe override int GetDestination(PathSegment* segments, int count, string path)
|
||||
{
|
||||
if (_depth == count)
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue