From 30fcfb65eaa61ed6bb566dabf9a337a761df034f Mon Sep 17 00:00:00 2001 From: Ryan Nowak Date: Sat, 16 Jun 2018 17:20:28 -0700 Subject: [PATCH] Productionize Path tokenization --- .../FastPathTokenizerBenchmarkBase.cs | 34 +++++ .../FastPathTokenizerEmptyBenchmark.cs | 32 +++++ .../FastPathTokenizerLargeBenchmark.cs | 36 ++++++ .../FastPathTokenizerPlaintextBenchmark.cs | 32 +++++ .../FastPathTokenizerSmallBenchmark.cs | 32 +++++ ...soft.AspNetCore.Routing.Performance.csproj | 3 +- .../Matchers/FastPathTokenizer.cs | 40 ++++++ .../Matchers/PathSegment.cs | 39 ++++++ .../Microsoft.AspNetCore.Routing.csproj | 1 + .../Matchers/DfaMatcher.cs | 51 ++------ .../Matchers/FastPathTokenizerTest.cs | 117 ++++++++++++++++++ .../Matchers/InstructionMatcher.cs | 34 +---- 12 files changed, 380 insertions(+), 71 deletions(-) create mode 100644 benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerBenchmarkBase.cs create mode 100644 benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerEmptyBenchmark.cs create mode 100644 benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerLargeBenchmark.cs create mode 100644 benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerPlaintextBenchmark.cs create mode 100644 benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerSmallBenchmark.cs create mode 100644 src/Microsoft.AspNetCore.Routing/Matchers/FastPathTokenizer.cs create mode 100644 src/Microsoft.AspNetCore.Routing/Matchers/PathSegment.cs create mode 100644 test/Microsoft.AspNetCore.Routing.Tests/Matchers/FastPathTokenizerTest.cs diff --git a/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerBenchmarkBase.cs b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerBenchmarkBase.cs new file mode 100644 index 0000000000..8f27db6c60 --- /dev/null +++ b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerBenchmarkBase.cs @@ -0,0 +1,34 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +namespace Microsoft.AspNetCore.Routing.Matchers +{ + public abstract class FastPathTokenizerBenchmarkBase + { + internal unsafe void NaiveBaseline(string path, PathSegment * segments, int maxCount) + { + int count = 0; + int start = 1; // Paths always start with a leading / + int end; + while ((end = path.IndexOf('/', start)) >= 0 && count < maxCount) + { + segments[count++] = new PathSegment(start, end - start); + start = end + 1; // resume search after the current character + } + + // Residue + var length = path.Length - start; + if (length > 0 && count < maxCount) + { + segments[count++] = new PathSegment(start, length); + } + } + + internal unsafe void MinimalBaseline(string path, PathSegment* segments, int maxCount) + { + var start = 1; + var length = path.Length - start; + segments[0] = new PathSegment(start, length); + } + } +} diff --git a/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerEmptyBenchmark.cs b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerEmptyBenchmark.cs new file mode 100644 index 0000000000..f637e565eb --- /dev/null +++ b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerEmptyBenchmark.cs @@ -0,0 +1,32 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using BenchmarkDotNet.Attributes; + +namespace Microsoft.AspNetCore.Routing.Matchers +{ + public class FastPathTokenizerEmptyBenchmark : FastPathTokenizerBenchmarkBase + { + private const int MaxCount = 32; + private static readonly string Input = "/"; + + // This is super hardcoded implementation for comparison, we dont't expect to do better. + [Benchmark(Baseline = true)] + public unsafe void Baseline() + { + var path = Input; + var segments = stackalloc PathSegment[MaxCount]; + + MinimalBaseline(path, segments, MaxCount); + } + + [Benchmark] + public unsafe void Implementation() + { + var path = Input; + var segments = stackalloc PathSegment[MaxCount]; + + FastPathTokenizer.Tokenize(path, segments, MaxCount); + } + } +} diff --git a/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerLargeBenchmark.cs b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerLargeBenchmark.cs new file mode 100644 index 0000000000..3e423191f1 --- /dev/null +++ b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerLargeBenchmark.cs @@ -0,0 +1,36 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using BenchmarkDotNet.Attributes; + +namespace Microsoft.AspNetCore.Routing.Matchers +{ + public class FastPathTokenizerLargeBenchmark : FastPathTokenizerBenchmarkBase + { + private static readonly int MaxCount = 32; + private static readonly string Input = + "/heeeeeeeeeeyyyyyyyyyyy/this/is/a/string/with/lots/of/segments" + + "/hoooooooooooooooooooooooooooooooooow long/do you think it should be?/I think" + + "/like/32/segments/is /a/goood/number/dklfl/20303/dlflkf" + + "/Im/tired/of/thinking/of/more/things/to/so"; + + // This is a naive reference implementation. We expect to do better. + [Benchmark(Baseline = true)] + public unsafe void Baseline() + { + var path = Input; + var segments = stackalloc PathSegment[MaxCount]; + + NaiveBaseline(path, segments, MaxCount); + } + + [Benchmark] + public unsafe void Implementation() + { + var path = Input; + var segments = stackalloc PathSegment[MaxCount]; + + FastPathTokenizer.Tokenize(path, segments, MaxCount); + } + } +} diff --git a/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerPlaintextBenchmark.cs b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerPlaintextBenchmark.cs new file mode 100644 index 0000000000..e125150512 --- /dev/null +++ b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerPlaintextBenchmark.cs @@ -0,0 +1,32 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using BenchmarkDotNet.Attributes; + +namespace Microsoft.AspNetCore.Routing.Matchers +{ + public class FastPathTokenizerPlaintextBenchmark : FastPathTokenizerBenchmarkBase + { + private const int MaxCount = 32; + private static readonly string Input = "/plaintext"; + + // This is super hardcoded implementation for comparison, we dont't expect to do better. + [Benchmark(Baseline = true)] + public unsafe void Baseline() + { + var path = Input; + var segments = stackalloc PathSegment[MaxCount]; + + MinimalBaseline(path, segments, MaxCount); + } + + [Benchmark] + public unsafe void Implementation() + { + var path = Input; + var segments = stackalloc PathSegment[MaxCount]; + + FastPathTokenizer.Tokenize(path, segments, MaxCount); + } + } +} diff --git a/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerSmallBenchmark.cs b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerSmallBenchmark.cs new file mode 100644 index 0000000000..70d1f6717e --- /dev/null +++ b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Matchers/FastPathTokenizerSmallBenchmark.cs @@ -0,0 +1,32 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using BenchmarkDotNet.Attributes; + +namespace Microsoft.AspNetCore.Routing.Matchers +{ + public class FastPathTokenizerSmallBenchmark : FastPathTokenizerBenchmarkBase + { + private const int MaxCount = 32; + private static readonly string Input = "/hello/world/cool"; + + // This is a naive reference implementation. We expect to do better. + [Benchmark(Baseline = true)] + public unsafe void Baseline() + { + var path = Input; + var segments = stackalloc PathSegment[MaxCount]; + + NaiveBaseline(path, segments, MaxCount); + } + + [Benchmark] + public unsafe void Implementation() + { + var path = Input; + var segments = stackalloc PathSegment[MaxCount]; + + FastPathTokenizer.Tokenize(path, segments, MaxCount); + } + } +} diff --git a/benchmarks/Microsoft.AspNetCore.Routing.Performance/Microsoft.AspNetCore.Routing.Performance.csproj b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Microsoft.AspNetCore.Routing.Performance.csproj index 323a0e58fe..8c807811b5 100644 --- a/benchmarks/Microsoft.AspNetCore.Routing.Performance/Microsoft.AspNetCore.Routing.Performance.csproj +++ b/benchmarks/Microsoft.AspNetCore.Routing.Performance/Microsoft.AspNetCore.Routing.Performance.csproj @@ -1,8 +1,7 @@  - netcoreapp2.0;net461 - netcoreapp2.0 + netcoreapp2.2 Exe true true diff --git a/src/Microsoft.AspNetCore.Routing/Matchers/FastPathTokenizer.cs b/src/Microsoft.AspNetCore.Routing/Matchers/FastPathTokenizer.cs new file mode 100644 index 0000000000..f245748e43 --- /dev/null +++ b/src/Microsoft.AspNetCore.Routing/Matchers/FastPathTokenizer.cs @@ -0,0 +1,40 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; + +namespace Microsoft.AspNetCore.Routing.Matchers +{ + // Low level implementation of our path tokenization algorithm. Alternative + // to PathTokenizer. + internal static class FastPathTokenizer + { + // This section tokenizes the path by marking the sequence of slashes, and their + // and the length of the text between them. + // + // If there is residue (text after last slash) then the length of the segment will + // computed based on the string length. + public static unsafe int Tokenize(string path, PathSegment* segments, int maxCount) + { + int count = 0; + int start = 1; // Paths always start with a leading / + int end; + var span = path.AsSpan(start); + while ((end = span.IndexOf('/')) >= 0 && count < maxCount) + { + segments[count++] = new PathSegment(start, end); + start += end + 1; // resume search after the current character + span = path.AsSpan(start); + } + + // Residue + var length = span.Length; + if (length > 0 && count < maxCount) + { + segments[count++] = new PathSegment(start, length); + } + + return count; + } + } +} diff --git a/src/Microsoft.AspNetCore.Routing/Matchers/PathSegment.cs b/src/Microsoft.AspNetCore.Routing/Matchers/PathSegment.cs new file mode 100644 index 0000000000..24ef150863 --- /dev/null +++ b/src/Microsoft.AspNetCore.Routing/Matchers/PathSegment.cs @@ -0,0 +1,39 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; + +namespace Microsoft.AspNetCore.Routing.Matchers +{ + internal readonly struct PathSegment : IEquatable + { + public readonly int Start; + public readonly int Length; + + public PathSegment(int start, int length) + { + Start = start; + Length = length; + } + + public override bool Equals(object obj) + { + return obj is PathSegment segment ? Equals(segment) : false; + } + + public bool Equals(PathSegment other) + { + return Start == other.Start && Length == other.Length; + } + + public override int GetHashCode() + { + return Start; + } + + public override string ToString() + { + return $"Segment({Start}:{Length})"; + } + } +} diff --git a/src/Microsoft.AspNetCore.Routing/Microsoft.AspNetCore.Routing.csproj b/src/Microsoft.AspNetCore.Routing/Microsoft.AspNetCore.Routing.csproj index f1e3d36a55..fa1ae2e1ac 100644 --- a/src/Microsoft.AspNetCore.Routing/Microsoft.AspNetCore.Routing.csproj +++ b/src/Microsoft.AspNetCore.Routing/Microsoft.AspNetCore.Routing.csproj @@ -8,6 +8,7 @@ Microsoft.AspNetCore.Routing.RouteCollection $(NoWarn);CS1591 true aspnetcore;routing + true diff --git a/test/Microsoft.AspNetCore.Routing.Tests/Matchers/DfaMatcher.cs b/test/Microsoft.AspNetCore.Routing.Tests/Matchers/DfaMatcher.cs index 39be615b9a..ce28cdd643 100644 --- a/test/Microsoft.AspNetCore.Routing.Tests/Matchers/DfaMatcher.cs +++ b/test/Microsoft.AspNetCore.Routing.Tests/Matchers/DfaMatcher.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; -using System.Collections.Specialized; using System.Threading.Tasks; using Microsoft.AspNetCore.Http; @@ -34,33 +33,12 @@ namespace Microsoft.AspNetCore.Routing.Matchers var current = 0; var path = httpContext.Request.Path.Value; - - // This section tokenizes the path by marking the sequence of slashes, and their - // position in the string. The consuming code uses the sequence and the count of - // slashes to deduce the length of each segment. - // - // If there is residue (text after last slash) then the length of the segment will - // computed based on the string length. - var buffer = stackalloc Segment[32]; - var segment = 0; - var start = 1; // PathString guarantees a leading / - var end = 0; - var length = 0; - while ((end = path.IndexOf('/', start)) >= 0 && segment < 32) + var buffer = stackalloc PathSegment[32]; + var count = FastPathTokenizer.Tokenize(path, buffer, 32); + + for (var i = 0; i < count; i++) { - length = end - start; - buffer[segment++] = new Segment() { Start = start, Length = length, }; - current = states[current].Transitions.GetDestination(path, start, length); - - start = end + 1; // resume search after the current character - } - - // Residue - length = path.Length - start; - if (length > 0) - { - buffer[segment++] = new Segment() { Start = start, Length = length, }; - current = states[current].Transitions.GetDestination(path, start, length); + current = states[current].Transitions.GetDestination(buffer, i, path); } var matches = new List<(Endpoint, RouteValueDictionary)>(); @@ -98,12 +76,6 @@ namespace Microsoft.AspNetCore.Routing.Matchers return Task.CompletedTask; } - public struct Segment - { - public int Start; - public int Length; - } - public struct State { public bool IsAccepting; @@ -119,7 +91,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers public abstract class JumpTable { - public abstract int GetDestination(string text, int start, int length); + public unsafe abstract int GetDestination(PathSegment* segments, int depth, string path); } public class JumpTableBuilder @@ -154,17 +126,18 @@ namespace Microsoft.AspNetCore.Routing.Matchers _entries = entries; } - public override int GetDestination(string text, int start, int length) + public unsafe override int GetDestination(PathSegment* segments, int depth, string path) { for (var i = 0; i < _entries.Length; i++) { - if (length == _entries[i].text.Length && + var segment = segments[depth]; + if (segment.Length == _entries[i].text.Length && string.Compare( - text, - start, + path, + segment.Start, _entries[i].text, 0, - length, + segment.Length, StringComparison.OrdinalIgnoreCase) == 0) { return _entries[i].destination; diff --git a/test/Microsoft.AspNetCore.Routing.Tests/Matchers/FastPathTokenizerTest.cs b/test/Microsoft.AspNetCore.Routing.Tests/Matchers/FastPathTokenizerTest.cs new file mode 100644 index 0000000000..c13b72bd71 --- /dev/null +++ b/test/Microsoft.AspNetCore.Routing.Tests/Matchers/FastPathTokenizerTest.cs @@ -0,0 +1,117 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using Xunit; + +namespace Microsoft.AspNetCore.Routing.Matchers +{ + public unsafe class FastPathTokenizerTest + { + [Fact] // Note: tokenizing a truly empty string is undefined. + public void Tokenize_EmptyPath() + { + // Arrange + var segments = stackalloc PathSegment[32]; + + // Act + var count = FastPathTokenizer.Tokenize("/", segments, 1); + + // Assert + Assert.Equal(0, count); + } + + [Fact] + public void Tokenize_SingleSegment() + { + // Arrange + var segments = stackalloc PathSegment[32]; + + // Act + var count = FastPathTokenizer.Tokenize("/abc", segments, 1); + + // Assert + Assert.Equal(1, count); + Assert.Equal(new PathSegment(1, 3), segments[0]); + } + + [Fact] + public void Tokenize_WithSomeSegments() + { + // Arrange + var segments = stackalloc PathSegment[32]; + + // Act + var count = FastPathTokenizer.Tokenize("/a/b/c", segments, 3); + + // Assert + Assert.Equal(3, count); + Assert.Equal(new PathSegment(1, 1), segments[0]); + Assert.Equal(new PathSegment(3, 1), segments[1]); + Assert.Equal(new PathSegment(5, 1), segments[2]); + } + + [Fact] // Empty trailing / is ignored + public void Tokenize_WithSomeSegments_TrailingSlash() + { + // Arrange + var segments = stackalloc PathSegment[32]; + + // Act + var count = FastPathTokenizer.Tokenize("/a/b/c/", segments, 3); + + // Assert + Assert.Equal(3, count); + Assert.Equal(new PathSegment(1, 1), segments[0]); + Assert.Equal(new PathSegment(3, 1), segments[1]); + Assert.Equal(new PathSegment(5, 1), segments[2]); + } + + [Fact] + public void Tokenize_LongerSegments() + { + // Arrange + var segments = stackalloc PathSegment[32]; + + // Act + var count = FastPathTokenizer.Tokenize("/aaa/bb/ccccc", segments, 3); + + // Assert + Assert.Equal(3, count); + Assert.Equal(new PathSegment(1, 3), segments[0]); + Assert.Equal(new PathSegment(5, 2), segments[1]); + Assert.Equal(new PathSegment(8, 5), segments[2]); + } + + [Fact] + public void Tokenize_EmptySegments() + { + // Arrange + var segments = stackalloc PathSegment[32]; + + // Act + var count = FastPathTokenizer.Tokenize("///c", segments, 3); + + // Assert + Assert.Equal(3, count); + Assert.Equal(new PathSegment(1, 0), segments[0]); + Assert.Equal(new PathSegment(2, 0), segments[1]); + Assert.Equal(new PathSegment(3, 1), segments[2]); + } + + [Fact] + public void Tokenize_TooManySegments() + { + // Arrange + var segments = stackalloc PathSegment[32]; + + // Act + var count = FastPathTokenizer.Tokenize("/a/b/c/d", segments, 3); + + // Assert + Assert.Equal(3, count); + Assert.Equal(new PathSegment(1, 1), segments[0]); + Assert.Equal(new PathSegment(3, 1), segments[1]); + Assert.Equal(new PathSegment(5, 1), segments[2]); + } + } +} diff --git a/test/Microsoft.AspNetCore.Routing.Tests/Matchers/InstructionMatcher.cs b/test/Microsoft.AspNetCore.Routing.Tests/Matchers/InstructionMatcher.cs index 4920b8781a..cc4d9e6f8d 100644 --- a/test/Microsoft.AspNetCore.Routing.Tests/Matchers/InstructionMatcher.cs +++ b/test/Microsoft.AspNetCore.Routing.Tests/Matchers/InstructionMatcher.cs @@ -39,28 +39,8 @@ namespace Microsoft.AspNetCore.Routing.Matchers var state = _state; var path = httpContext.Request.Path.Value; - - // This section tokenizes the path by marking the sequence of slashes, and their - // position in the string. The consuming code uses the sequence and the count of - // slashes to deduce the length of each segment. - // - // If there is residue (text after last slash) then the length of the segment will - // computed based on the string length. - var buffer = stackalloc Segment[32]; - var count = 0; - var start = 1; // PathString guarantees a leading / - var end = 0; - while ((end = path.IndexOf('/', start)) >= 0 && count < 32) - { - buffer[count++] = new Segment() { Start = start, Length = end - start, }; - start = end + 1; // resume search after the current character - } - - // Residue - if (start < path.Length) - { - buffer[count++] = new Segment() { Start = start, Length = path.Length - start, }; - } + var buffer = stackalloc PathSegment[32]; + var count = FastPathTokenizer.Tokenize(path, buffer, 32); var i = 0; var candidates = new List(); @@ -125,12 +105,6 @@ namespace Microsoft.AspNetCore.Routing.Matchers return Task.CompletedTask; } - public struct Segment - { - public int Start; - public int Length; - } - public struct Candidate { public Endpoint Endpoint; @@ -173,7 +147,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers public abstract class JumpTable { - public unsafe abstract int GetDestination(Segment* segments, int count, string path); + public unsafe abstract int GetDestination(PathSegment* segments, int depth, string path); } public class JumpTableBuilder @@ -208,7 +182,7 @@ namespace Microsoft.AspNetCore.Routing.Matchers _entries = entries; } - public unsafe override int GetDestination(Segment* segments, int count, string path) + public unsafe override int GetDestination(PathSegment* segments, int count, string path) { if (_depth == count) {