Span-based RemoveDotSegments (#1448)

This commit is contained in:
Cesar Blum Silveira 2017-03-17 14:42:13 -07:00 committed by GitHub
parent 9a4a810aa8
commit 2ef3804578
6 changed files with 341 additions and 126 deletions

View File

@ -1253,29 +1253,42 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Internal.Http
try
{
// Read raw target before mutating memory.
rawTarget = target.GetAsciiStringNonNullCharacters();
if (pathEncoded)
{
// Read raw target before mutating memory.
rawTarget = target.GetAsciiStringNonNullCharacters();
// URI was encoded, unescape and then parse as UTF-8
var pathLength = UrlEncoder.Decode(path, path);
// Removing dot segments must be done after unescaping. From RFC 3986:
//
// URI producing applications should percent-encode data octets that
// correspond to characters in the reserved set unless these characters
// are specifically allowed by the URI scheme to represent data in that
// component. If a reserved character is found in a URI component and
// no delimiting role is known for that character, then it must be
// interpreted as representing the data octet corresponding to that
// character's encoding in US-ASCII.
//
// https://tools.ietf.org/html/rfc3986#section-2.2
pathLength = PathNormalizer.RemoveDotSegments(path.Slice(0, pathLength));
// URI was encoded, unescape and then parse as utf8
int pathLength = UrlEncoder.Decode(path, path);
requestUrlPath = GetUtf8String(path.Slice(0, pathLength));
}
else
{
// URI wasn't encoded, parse as ASCII
requestUrlPath = path.GetAsciiStringNonNullCharacters();
var pathLength = PathNormalizer.RemoveDotSegments(path);
if (query.Length == 0)
if (path.Length == pathLength && query.Length == 0)
{
// No need to allocate an extra string if the path didn't need
// decoding and there's no query string following it.
rawTarget = requestUrlPath;
// If no decoding was required, no dot segments were removed and
// there is no query, the request path is the same as the raw target
requestUrlPath = rawTarget;
}
else
{
rawTarget = target.GetAsciiStringNonNullCharacters();
requestUrlPath = path.Slice(0, pathLength).GetAsciiStringNonNullCharacters();
}
}
}
@ -1286,7 +1299,7 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Internal.Http
QueryString = query.GetAsciiStringNonNullCharacters();
RawTarget = rawTarget;
Path = PathNormalizer.RemoveDotSegments(requestUrlPath);
Path = requestUrlPath;
}
private void OnAuthorityFormTarget(HttpMethod method, Span<byte> target)
@ -1360,7 +1373,7 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Internal.Http
RejectRequestTarget(target);
}
Path = PathNormalizer.RemoveDotSegments(uri.LocalPath);
Path = uri.LocalPath;
// don't use uri.Query because we need the unescaped version
QueryString = query.GetAsciiStringNonNullCharacters();
}

View File

@ -1,117 +1,204 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System.Buffers;
using System;
using System.Diagnostics;
namespace Microsoft.AspNetCore.Server.Kestrel.Internal.Http
{
public static class PathNormalizer
{
public static string RemoveDotSegments(string path)
private const byte ByteSlash = (byte)'/';
private const byte ByteDot = (byte)'.';
// In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4
public static unsafe int RemoveDotSegments(Span<byte> input)
{
if (ContainsDotSegments(path))
fixed (byte* start = &input.DangerousGetPinnableReference())
{
var normalizedChars = ArrayPool<char>.Shared.Rent(path.Length);
var normalizedIndex = normalizedChars.Length;
var pathIndex = path.Length - 1;
var skipSegments = 0;
while (pathIndex >= 0)
{
if (pathIndex >= 2 && path[pathIndex] == '.' && path[pathIndex - 1] == '.' && path[pathIndex - 2] == '/')
{
if (normalizedIndex == normalizedChars.Length || normalizedChars[normalizedIndex] != '/')
{
normalizedChars[--normalizedIndex] = '/';
}
skipSegments++;
pathIndex -= 3;
}
else if (pathIndex >= 1 && path[pathIndex] == '.' && path[pathIndex - 1] == '/')
{
pathIndex -= 2;
}
else
{
while (pathIndex >= 0)
{
var lastChar = path[pathIndex];
if (skipSegments == 0)
{
normalizedChars[--normalizedIndex] = lastChar;
}
pathIndex--;
if (lastChar == '/')
{
break;
}
}
if (skipSegments > 0)
{
skipSegments--;
}
}
}
path = new string(normalizedChars, normalizedIndex, normalizedChars.Length - normalizedIndex);
ArrayPool<char>.Shared.Return(normalizedChars);
var end = start + input.Length;
return RemoveDotSegments(start, end);
}
return path;
}
private unsafe static bool ContainsDotSegments(string path)
public static unsafe int RemoveDotSegments(byte* start, byte* end)
{
fixed (char* ptr = path)
if (!ContainsDotSegments(start, end))
{
char* end = ptr + path.Length;
return (int)(end - start);
}
for (char* p = ptr; p < end; p++)
var src = start;
var dst = start;
while (src < end)
{
var ch1 = *src;
Debug.Assert(ch1 == '/', "Path segment must always start with a '/'");
byte ch2, ch3, ch4;
switch (end - src)
{
if (*p == '/')
{
p++;
}
case 1:
break;
case 2:
ch2 = *(src + 1);
if (p == end)
{
return false;
}
if (*p == '.')
{
p++;
if (p == end)
if (ch2 == ByteDot)
{
return true;
// B. if the input buffer begins with a prefix of "/./" or "/.",
// where "." is a complete path segment, then replace that
// prefix with "/" in the input buffer; otherwise,
src += 1;
*src = ByteSlash;
continue;
}
if (*p == '.')
{
p++;
break;
case 3:
ch2 = *(src + 1);
ch3 = *(src + 2);
if (p == end)
if (ch2 == ByteDot && ch3 == ByteDot)
{
// C. if the input buffer begins with a prefix of "/../" or "/..",
// where ".." is a complete path segment, then replace that
// prefix with "/" in the input buffer and remove the last
// segment and its preceding "/" (if any) from the output
// buffer; otherwise,
src += 2;
*src = ByteSlash;
if (dst > start)
{
return true;
do
{
dst--;
} while (dst > start && *dst != ByteSlash);
}
if (*p == '/')
{
return true;
}
continue;
}
else if (*p == '/')
else if (ch2 == ByteDot && ch3 == ByteSlash)
{
return true;
// B. if the input buffer begins with a prefix of "/./" or "/.",
// where "." is a complete path segment, then replace that
// prefix with "/" in the input buffer; otherwise,
src += 2;
continue;
}
}
break;
default:
ch2 = *(src + 1);
ch3 = *(src + 2);
ch4 = *(src + 3);
if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash)
{
// C. if the input buffer begins with a prefix of "/../" or "/..",
// where ".." is a complete path segment, then replace that
// prefix with "/" in the input buffer and remove the last
// segment and its preceding "/" (if any) from the output
// buffer; otherwise,
src += 3;
if (dst > start)
{
do
{
dst--;
} while (dst > start && *dst != ByteSlash);
}
continue;
}
else if (ch2 == ByteDot && ch3 == ByteSlash)
{
// B. if the input buffer begins with a prefix of "/./" or "/.",
// where "." is a complete path segment, then replace that
// prefix with "/" in the input buffer; otherwise,
src += 2;
continue;
}
break;
}
// E. move the first path segment in the input buffer to the end of
// the output buffer, including the initial "/" character (if
// any) and any subsequent characters up to, but not including,
// the next "/" character or the end of the input buffer.
do
{
*dst++ = ch1;
ch1 = *++src;
} while (src < end && ch1 != ByteSlash);
}
if (dst == start)
{
*dst++ = ByteSlash;
}
return (int)(dst - start);
}
public static unsafe bool ContainsDotSegments(byte* start, byte* end)
{
var src = start;
var dst = start;
while (src < end)
{
var ch1 = *src;
Debug.Assert(ch1 == '/', "Path segment must always start with a '/'");
byte ch2, ch3, ch4;
switch (end - src)
{
case 1:
break;
case 2:
ch2 = *(src + 1);
if (ch2 == ByteDot)
{
return true;
}
break;
case 3:
ch2 = *(src + 1);
ch3 = *(src + 2);
if ((ch2 == ByteDot && ch3 == ByteDot) ||
(ch2 == ByteDot && ch3 == ByteSlash))
{
return true;
}
break;
default:
ch2 = *(src + 1);
ch3 = *(src + 2);
ch4 = *(src + 3);
if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) ||
(ch2 == ByteDot && ch3 == ByteSlash))
{
return true;
}
break;
}
do
{
ch1 = *++src;
} while (src < end && ch1 != ByteSlash);
}
return false;

View File

@ -0,0 +1,60 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Text;
using BenchmarkDotNet.Attributes;
using Microsoft.AspNetCore.Server.Kestrel.Internal.Http;
namespace Microsoft.AspNetCore.Server.Kestrel.Performance
{
[Config(typeof(CoreConfig))]
public class DotSegmentRemovalBenchmark
{
// Immutable
private const string _noDotSegments = "/long/request/target/for/benchmarking/what/else/can/we/put/here";
private const string _singleDotSegments = "/long/./request/./target/./for/./benchmarking/./what/./else/./can/./we/./put/./here";
private const string _doubleDotSegments = "/long/../request/../target/../for/../benchmarking/../what/../else/../can/../we/../put/../here";
private readonly byte[] _noDotSegmentsAscii = Encoding.ASCII.GetBytes(_noDotSegments);
private readonly byte[] _singleDotSegmentsAscii = Encoding.ASCII.GetBytes(_singleDotSegments);
private readonly byte[] _doubleDotSegmentsAscii = Encoding.ASCII.GetBytes(_doubleDotSegments);
private readonly byte[] _noDotSegmentsBytes = new byte[_noDotSegments.Length];
private readonly byte[] _singleDotSegmentsBytes = new byte[_singleDotSegments.Length];
private readonly byte[] _doubleDotSegmentsBytes = new byte[_doubleDotSegments.Length];
[Benchmark(Baseline = true)]
public unsafe int NoDotSegments()
{
_noDotSegmentsAscii.CopyTo(_noDotSegmentsBytes);
fixed (byte* start = _noDotSegmentsBytes)
{
return PathNormalizer.RemoveDotSegments(start, start + _noDotSegments.Length);
}
}
[Benchmark]
public unsafe int SingleDotSegments()
{
_singleDotSegmentsAscii.CopyTo(_singleDotSegmentsBytes);
fixed (byte* start = _singleDotSegmentsBytes)
{
return PathNormalizer.RemoveDotSegments(start, start + _singleDotSegments.Length);
}
}
[Benchmark]
public unsafe int DoubleDotSegments()
{
_doubleDotSegmentsAscii.CopyTo(_doubleDotSegmentsBytes);
fixed (byte* start = _doubleDotSegmentsBytes)
{
return PathNormalizer.RemoveDotSegments(start, start + _doubleDotSegments.Length);
}
}
}
}

View File

@ -246,7 +246,7 @@ namespace Microsoft.AspNetCore.Server.KestrelTests
}
[Theory]
[MemberData(nameof(ValidRequestLineData))]
[MemberData(nameof(RequestLineValidData))]
public async Task TakeStartLineSetsFrameProperties(
string requestLine,
string expectedMethod,
@ -271,6 +271,27 @@ namespace Microsoft.AspNetCore.Server.KestrelTests
Assert.Equal(expectedHttpVersion, _frame.HttpVersion);
}
[Theory]
[MemberData(nameof(RequestLineDotSegmentData))]
public async Task TakeStartLineRemovesDotSegmentsFromTarget(
string requestLine,
string expectedRawTarget,
string expectedDecodedPath,
string expectedQueryString)
{
var requestLineBytes = Encoding.ASCII.GetBytes(requestLine);
await _input.Writer.WriteAsync(requestLineBytes);
var readableBuffer = (await _input.Reader.ReadAsync()).Buffer;
var returnValue = _frame.TakeStartLine(readableBuffer, out _consumed, out _examined);
_input.Reader.Advance(_consumed, _examined);
Assert.True(returnValue);
Assert.Equal(expectedRawTarget, _frame.RawTarget);
Assert.Equal(expectedDecodedPath, _frame.Path);
Assert.Equal(expectedQueryString, _frame.QueryString);
}
[Fact]
public async Task ParseRequestStartsRequestHeadersTimeoutOnFirstByteAvailable()
{
@ -595,7 +616,9 @@ namespace Microsoft.AspNetCore.Server.KestrelTests
}
}
public static IEnumerable<object> ValidRequestLineData => HttpParsingData.RequestLineValidData;
public static IEnumerable<object> RequestLineValidData => HttpParsingData.RequestLineValidData;
public static IEnumerable<object> RequestLineDotSegmentData => HttpParsingData.RequestLineDotSegmentData;
public static TheoryData<string> TargetWithEncodedNullCharData
{

View File

@ -15,41 +15,51 @@ namespace Microsoft.AspNetCore.Server.KestrelTests
[InlineData("/a/", "/a/")]
[InlineData("/a/b", "/a/b")]
[InlineData("/a/b/", "/a/b/")]
[InlineData("/a", "/./a")]
[InlineData("/a", "/././a")]
[InlineData("/a", "/../a")]
[InlineData("/a", "/../../a")]
[InlineData("/a/b", "/a/./b")]
[InlineData("/b", "/a/../b")]
[InlineData("/a/", "/a/./")]
[InlineData("/a", "/a/.")]
[InlineData("/", "/a/../b/../")]
[InlineData("/", "/a/../b/..")]
[InlineData("/b", "/a/../../b")]
[InlineData("/b/", "/a/../../b/")]
[InlineData("/b", "/a/.././../b")]
[InlineData("/b/", "/a/.././../b/")]
[InlineData("/a/d", "/a/b/c/./../../d")]
[InlineData("/a/d", "/./a/b/c/./../../d")]
[InlineData("/a/d", "/../a/b/c/./../../d")]
[InlineData("/a/d", "/./../a/b/c/./../../d")]
[InlineData("/a/d", "/.././a/b/c/./../../d")]
[InlineData("/./a", "/a")]
[InlineData("/././a", "/a")]
[InlineData("/../a", "/a")]
[InlineData("/../../a", "/a")]
[InlineData("/a/./b", "/a/b")]
[InlineData("/a/../b", "/b")]
[InlineData("/a/./", "/a/")]
[InlineData("/a/.", "/a/")]
[InlineData("/a/../", "/")]
[InlineData("/a/..", "/")]
[InlineData("/a/../b/../", "/")]
[InlineData("/a/../b/..", "/")]
[InlineData("/a/../../b", "/b")]
[InlineData("/a/../../b/", "/b/")]
[InlineData("/a/.././../b", "/b")]
[InlineData("/a/.././../b/", "/b/")]
[InlineData("/a/b/c/./../../d", "/a/d")]
[InlineData("/./a/b/c/./../../d", "/a/d")]
[InlineData("/../a/b/c/./../../d", "/a/d")]
[InlineData("/./../a/b/c/./../../d", "/a/d")]
[InlineData("/.././a/b/c/./../../d", "/a/d")]
[InlineData("/.a", "/.a")]
[InlineData("/..a", "/..a")]
[InlineData("/...", "/...")]
[InlineData("/a/.../b", "/a/.../b")]
[InlineData("/b", "/a/../.../../b")]
[InlineData("/a/../.../../b", "/b")]
[InlineData("/a/.b", "/a/.b")]
[InlineData("/a/..b", "/a/..b")]
[InlineData("/a/b.", "/a/b.")]
[InlineData("/a/b..", "/a/b..")]
[InlineData("a/b", "a/b")]
[InlineData("a/c", "a/b/../c")]
[InlineData("*", "*")]
public void RemovesDotSegments(string expected, string input)
[InlineData("/longlong/../short", "/short")]
[InlineData("/short/../longlong", "/longlong")]
[InlineData("/longlong/../short/..", "/")]
[InlineData("/short/../longlong/..", "/")]
[InlineData("/longlong/../short/../", "/")]
[InlineData("/short/../longlong/../", "/")]
[InlineData("/", "/")]
[InlineData("/no/segments", "/no/segments")]
[InlineData("/no/segments/", "/no/segments/")]
public void RemovesDotSegments(string input, string expected)
{
var result = PathNormalizer.RemoveDotSegments(input);
Assert.Equal(expected, result);
var data = Encoding.ASCII.GetBytes(input);
var length = PathNormalizer.RemoveDotSegments(new Span<byte>(data));
Assert.True(length >= 1);
Assert.Equal(expected, Encoding.ASCII.GetString(data, 0, length));
}
}
}

View File

@ -93,6 +93,28 @@ namespace Microsoft.AspNetCore.Testing
}
}
public static IEnumerable<string[]> RequestLineDotSegmentData => new[]
{
new[] { "GET /a/../b HTTP/1.1\r\n", "/a/../b", "/b", "" },
new[] { "GET /%61/../%62 HTTP/1.1\r\n", "/%61/../%62", "/b", "" },
new[] { "GET /a/%2E%2E/b HTTP/1.1\r\n", "/a/%2E%2E/b", "/b", "" },
new[] { "GET /%61/%2E%2E/%62 HTTP/1.1\r\n", "/%61/%2E%2E/%62", "/b", "" },
new[] { "GET /a?p=/a/../b HTTP/1.1\r\n", "/a?p=/a/../b", "/a", "?p=/a/../b" },
new[] { "GET /a?p=/a/%2E%2E/b HTTP/1.1\r\n", "/a?p=/a/%2E%2E/b", "/a", "?p=/a/%2E%2E/b" },
new[] { "GET http://example.com/a/../b HTTP/1.1\r\n", "http://example.com/a/../b", "/b", "" },
new[] { "GET http://example.com/%61/../%62 HTTP/1.1\r\n", "http://example.com/%61/../%62", "/b", "" },
new[] { "GET http://example.com/a/%2E%2E/b HTTP/1.1\r\n", "http://example.com/a/%2E%2E/b", "/b", "" },
new[] { "GET http://example.com/%61/%2E%2E/%62 HTTP/1.1\r\n", "http://example.com/%61/%2E%2E/%62", "/b", "" },
new[] { "GET http://example.com/a?p=/a/../b HTTP/1.1\r\n", "http://example.com/a?p=/a/../b", "/a", "?p=/a/../b" },
new[] { "GET http://example.com/a?p=/a/%2E%2E/b HTTP/1.1\r\n", "http://example.com/a?p=/a/%2E%2E/b", "/a", "?p=/a/%2E%2E/b" },
new[] { "GET http://example.com?p=/a/../b HTTP/1.1\r\n", "http://example.com?p=/a/../b", "/", "?p=/a/../b" },
new[] { "GET http://example.com?p=/a/%2E%2E/b HTTP/1.1\r\n", "http://example.com?p=/a/%2E%2E/b", "/", "?p=/a/%2E%2E/b" },
// Asterisk-form and authority-form should be unaffected and cause no issues
new[] { "OPTIONS * HTTP/1.1\r\n", "*", "", "" },
new[] { "CONNECT www.example.com HTTP/1.1\r\n", "www.example.com", "", "" },
};
public static IEnumerable<string> RequestLineIncompleteData => new[]
{
"G",