diff --git a/src/Microsoft.AspNet.Server.Kestrel/Http/Frame.cs b/src/Microsoft.AspNet.Server.Kestrel/Http/Frame.cs index 16fa98eae7..4b889c710e 100644 --- a/src/Microsoft.AspNet.Server.Kestrel/Http/Frame.cs +++ b/src/Microsoft.AspNet.Server.Kestrel/Http/Frame.cs @@ -677,7 +677,7 @@ namespace Microsoft.AspNet.Server.Kestrel.Http { return false; } - var method = begin.GetString(scan); + var method = begin.GetAsciiString(scan); scan.Take(); begin = scan; @@ -701,7 +701,7 @@ namespace Microsoft.AspNet.Server.Kestrel.Http { return false; } - queryString = begin.GetString(scan); + queryString = begin.GetAsciiString(scan); } scan.Take(); @@ -710,7 +710,7 @@ namespace Microsoft.AspNet.Server.Kestrel.Http { return false; } - var httpVersion = begin.GetString(scan); + var httpVersion = begin.GetAsciiString(scan); scan.Take(); if (scan.Take() != '\n') @@ -718,12 +718,21 @@ namespace Microsoft.AspNet.Server.Kestrel.Http return false; } + // URIs are always encoded/escaped to ASCII https://tools.ietf.org/html/rfc3986#page-11 + // Multibyte Internationalized Resource Identifiers (IRIs) are first converted to utf8; + // then encoded/escaped to ASCII https://www.ietf.org/rfc/rfc3987.txt "Mapping of IRIs to URIs" + string requestUrlPath; if (needDecode) { + // URI was encoded, unescape and then parse as utf8 pathEnd = UrlPathDecoder.Unescape(pathBegin, pathEnd); + requestUrlPath = pathBegin.GetUtf8String(pathEnd); + } + else + { + // URI wasn't encoded, parse as ASCII + requestUrlPath = pathBegin.GetAsciiString(pathEnd); } - - var requestUrlPath = pathBegin.GetString(pathEnd); consumed = scan; Method = method; @@ -739,11 +748,6 @@ namespace Microsoft.AspNet.Server.Kestrel.Http } } - static string GetString(ArraySegment range, int startIndex, int endIndex) - { - return Encoding.UTF8.GetString(range.Array, range.Offset + startIndex, endIndex - startIndex); - } - public static bool TakeMessageHeaders(SocketInput input, FrameRequestHeaders requestHeaders) { var scan = input.ConsumingStart(); @@ -835,7 +839,7 @@ namespace Microsoft.AspNet.Server.Kestrel.Http } var name = beginName.GetArraySegment(endName); - var value = beginValue.GetString(endValue); + var value = beginValue.GetAsciiString(endValue); if (wrapping) { value = value.Replace("\r\n", " "); diff --git a/src/Microsoft.AspNet.Server.Kestrel/Infrastructure/MemoryPoolIterator2.cs b/src/Microsoft.AspNet.Server.Kestrel/Infrastructure/MemoryPoolIterator2.cs index ddb12bff7f..7bf514707e 100644 --- a/src/Microsoft.AspNet.Server.Kestrel/Infrastructure/MemoryPoolIterator2.cs +++ b/src/Microsoft.AspNet.Server.Kestrel/Infrastructure/MemoryPoolIterator2.cs @@ -4,7 +4,6 @@ using System; using System.Linq; using System.Numerics; -using System.Text; namespace Microsoft.AspNet.Server.Kestrel.Infrastructure { @@ -22,8 +21,6 @@ namespace Microsoft.AspNet.Server.Kestrel.Infrastructure /// private static Vector _dotIndex = new Vector(Enumerable.Range(0, Vector.Count).Select(x => (byte)-x).ToArray()); - private static Encoding _utf8 = Encoding.UTF8; - private MemoryPoolBlock2 _block; private int _index; @@ -488,101 +485,6 @@ namespace Microsoft.AspNet.Server.Kestrel.Infrastructure } } - public string GetString(MemoryPoolIterator2 end) - { - if (IsDefault || end.IsDefault) - { - return default(string); - } - if (end._block == _block) - { - return _utf8.GetString(_block.Array, _index, end._index - _index); - } - - var decoder = _utf8.GetDecoder(); - - var length = GetLength(end); - var charLength = length * 2; - var chars = new char[charLength]; - var charIndex = 0; - - var block = _block; - var index = _index; - var remaining = length; - while (true) - { - int bytesUsed; - int charsUsed; - bool completed; - var following = block.End - index; - if (remaining <= following) - { - decoder.Convert( - block.Array, - index, - remaining, - chars, - charIndex, - charLength - charIndex, - true, - out bytesUsed, - out charsUsed, - out completed); - return new string(chars, 0, charIndex + charsUsed); - } - else if (block.Next == null) - { - decoder.Convert( - block.Array, - index, - following, - chars, - charIndex, - charLength - charIndex, - true, - out bytesUsed, - out charsUsed, - out completed); - return new string(chars, 0, charIndex + charsUsed); - } - else - { - decoder.Convert( - block.Array, - index, - following, - chars, - charIndex, - charLength - charIndex, - false, - out bytesUsed, - out charsUsed, - out completed); - charIndex += charsUsed; - remaining -= following; - block = block.Next; - index = block.Start; - } - } - } - - public ArraySegment GetArraySegment(MemoryPoolIterator2 end) - { - if (IsDefault || end.IsDefault) - { - return default(ArraySegment); - } - if (end._block == _block) - { - return new ArraySegment(_block.Array, _index, end._index - _index); - } - - var length = GetLength(end); - var array = new byte[length]; - CopyTo(array, 0, length, out length); - return new ArraySegment(array, 0, length); - } - public MemoryPoolIterator2 CopyTo(byte[] array, int offset, int count, out int actual) { if (IsDefault) diff --git a/src/Microsoft.AspNet.Server.Kestrel/Infrastructure/MemoryPoolIterator2Extenstions.cs b/src/Microsoft.AspNet.Server.Kestrel/Infrastructure/MemoryPoolIterator2Extenstions.cs new file mode 100644 index 0000000000..d46eee028f --- /dev/null +++ b/src/Microsoft.AspNet.Server.Kestrel/Infrastructure/MemoryPoolIterator2Extenstions.cs @@ -0,0 +1,207 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Text; + +namespace Microsoft.AspNet.Server.Kestrel.Infrastructure +{ + public static class MemoryPoolIterator2Extenstions + { + private const int _maxStackAllocBytes = 16384; + + private static Encoding _utf8 = Encoding.UTF8; + + private static unsafe string GetAsciiStringStack(byte[] input, int inputOffset, int length) + { + // avoid declaring other local vars, or doing work with stackalloc + // to prevent the .locals init cil flag , see: https://github.com/dotnet/coreclr/issues/1279 + char* output = stackalloc char[length]; + + return GetAsciiStringImplementation(output, input, inputOffset, length); + } + private static unsafe string GetAsciiStringImplementation(char* output, byte[] input, int inputOffset, int length) + { + for (var i = 0; i < length; i++) + { + output[i] = (char)input[inputOffset + i]; + } + + return new string(output, 0, length); + } + + private static unsafe string GetAsciiStringStack(MemoryPoolBlock2 start, MemoryPoolIterator2 end, int inputOffset, int length) + { + // avoid declaring other local vars, or doing work with stackalloc + // to prevent the .locals init cil flag , see: https://github.com/dotnet/coreclr/issues/1279 + char* output = stackalloc char[length]; + + return GetAsciiStringImplementation(output, start, end, inputOffset, length); + } + + private unsafe static string GetAsciiStringHeap(MemoryPoolBlock2 start, MemoryPoolIterator2 end, int inputOffset, int length) + { + var buffer = new char[length]; + + fixed (char* output = buffer) + { + return GetAsciiStringImplementation(output, start, end, inputOffset, length); + } + } + + private static unsafe string GetAsciiStringImplementation(char* output, MemoryPoolBlock2 start, MemoryPoolIterator2 end, int inputOffset, int length) + { + var outputOffset = 0; + var block = start; + var remaining = length; + + var endBlock = end.Block; + var endIndex = end.Index; + + while (true) + { + int following = (block != endBlock ? block.End : endIndex) - inputOffset; + + if (following > 0) + { + var input = block.Array; + for (var i = 0; i < following; i++) + { + output[i + outputOffset] = (char)input[i + inputOffset]; + } + + remaining -= following; + outputOffset += following; + } + + if (remaining == 0) + { + return new string(output, 0, length); + } + + block = block.Next; + inputOffset = block.Start; + } + } + + public static string GetAsciiString(this MemoryPoolIterator2 start, MemoryPoolIterator2 end) + { + if (start.IsDefault || end.IsDefault) + { + return default(string); + } + + var length = start.GetLength(end); + + // Bytes out of the range of ascii are treated as "opaque data" + // and kept in string as a char value that casts to same input byte value + // https://tools.ietf.org/html/rfc7230#section-3.2.4 + if (end.Block == start.Block) + { + return GetAsciiStringStack(start.Block.Array, start.Index, length); + } + + if (length > _maxStackAllocBytes) + { + return GetAsciiStringHeap(start.Block, end, start.Index, length); + } + + return GetAsciiStringStack(start.Block, end, start.Index, length); + } + + public static string GetUtf8String(this MemoryPoolIterator2 start, MemoryPoolIterator2 end) + { + if (start.IsDefault || end.IsDefault) + { + return default(string); + } + if (end.Block == start.Block) + { + return _utf8.GetString(start.Block.Array, start.Index, end.Index - start.Index); + } + + var decoder = _utf8.GetDecoder(); + + var length = start.GetLength(end); + var charLength = length * 2; + var chars = new char[charLength]; + var charIndex = 0; + + var block = start.Block; + var index = start.Index; + var remaining = length; + while (true) + { + int bytesUsed; + int charsUsed; + bool completed; + var following = block.End - index; + if (remaining <= following) + { + decoder.Convert( + block.Array, + index, + remaining, + chars, + charIndex, + charLength - charIndex, + true, + out bytesUsed, + out charsUsed, + out completed); + return new string(chars, 0, charIndex + charsUsed); + } + else if (block.Next == null) + { + decoder.Convert( + block.Array, + index, + following, + chars, + charIndex, + charLength - charIndex, + true, + out bytesUsed, + out charsUsed, + out completed); + return new string(chars, 0, charIndex + charsUsed); + } + else + { + decoder.Convert( + block.Array, + index, + following, + chars, + charIndex, + charLength - charIndex, + false, + out bytesUsed, + out charsUsed, + out completed); + charIndex += charsUsed; + remaining -= following; + block = block.Next; + index = block.Start; + } + } + } + + public static ArraySegment GetArraySegment(this MemoryPoolIterator2 start, MemoryPoolIterator2 end) + { + if (start.IsDefault || end.IsDefault) + { + return default(ArraySegment); + } + if (end.Block == start.Block) + { + return new ArraySegment(start.Block.Array, start.Index, end.Index - start.Index); + } + + var length = start.GetLength(end); + var array = new byte[length]; + start.CopyTo(array, 0, length, out length); + return new ArraySegment(array, 0, length); + } + } +} diff --git a/test/Microsoft.AspNet.Server.KestrelTests/AsciiDecoder.cs b/test/Microsoft.AspNet.Server.KestrelTests/AsciiDecoder.cs new file mode 100644 index 0000000000..26c034d2d5 --- /dev/null +++ b/test/Microsoft.AspNet.Server.KestrelTests/AsciiDecoder.cs @@ -0,0 +1,116 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Linq; +using Microsoft.AspNet.Server.Kestrel.Infrastructure; +using Xunit; + +namespace Microsoft.AspNet.Server.KestrelTests +{ + public class AsciiDecoderTests + { + [Fact] + private void FullByteRangeSupported() + { + var byteRange = Enumerable.Range(0, 255).Select(x => (byte)x).ToArray(); + + var mem = MemoryPoolBlock2.Create(new ArraySegment(byteRange), IntPtr.Zero, null, null); + mem.End = byteRange.Length; + + var begin = mem.GetIterator(); + var end = GetIterator(begin, byteRange.Length); + + var s = begin.GetAsciiString(end); + + Assert.Equal(s.Length, byteRange.Length); + + for (var i = 0; i < byteRange.Length; i++) + { + var sb = (byte)s[i]; + var b = byteRange[i]; + + Assert.Equal(sb, b); + } + } + + [Fact] + private void MultiBlockProducesCorrectResults() + { + var byteRange = Enumerable.Range(0, 512 + 64).Select(x => (byte)x).ToArray(); + var expectedByteRange = byteRange + .Concat(byteRange) + .Concat(byteRange) + .Concat(byteRange) + .ToArray(); + + var mem0 = MemoryPoolBlock2.Create(new ArraySegment(byteRange), IntPtr.Zero, null, null); + var mem1 = MemoryPoolBlock2.Create(new ArraySegment(byteRange), IntPtr.Zero, null, null); + var mem2 = MemoryPoolBlock2.Create(new ArraySegment(byteRange), IntPtr.Zero, null, null); + var mem3 = MemoryPoolBlock2.Create(new ArraySegment(byteRange), IntPtr.Zero, null, null); + mem0.End = byteRange.Length; + mem1.End = byteRange.Length; + mem2.End = byteRange.Length; + mem3.End = byteRange.Length; + + mem0.Next = mem1; + mem1.Next = mem2; + mem2.Next = mem3; + + var begin = mem0.GetIterator(); + var end = GetIterator(begin, expectedByteRange.Length); + + var s = begin.GetAsciiString(end); + + Assert.Equal(s.Length, expectedByteRange.Length); + + for (var i = 0; i < expectedByteRange.Length; i++) + { + var sb = (byte)s[i]; + var b = expectedByteRange[i]; + + Assert.Equal(sb, b); + } + } + + [Fact] + private void HeapAllocationProducesCorrectResults() + { + var byteRange = Enumerable.Range(0, 16384 + 64).Select(x => (byte)x).ToArray(); + var expectedByteRange = byteRange.Concat(byteRange).ToArray(); + + var mem0 = MemoryPoolBlock2.Create(new ArraySegment(byteRange), IntPtr.Zero, null, null); + var mem1 = MemoryPoolBlock2.Create(new ArraySegment(byteRange), IntPtr.Zero, null, null); + mem0.End = byteRange.Length; + mem1.End = byteRange.Length; + + mem0.Next = mem1; + + var begin = mem0.GetIterator(); + var end = GetIterator(begin, expectedByteRange.Length); + + var s = begin.GetAsciiString(end); + + Assert.Equal(s.Length, expectedByteRange.Length); + + for (var i = 0; i < expectedByteRange.Length; i++) + { + var sb = (byte)s[i]; + var b = expectedByteRange[i]; + + Assert.Equal(sb, b); + } + } + + private MemoryPoolIterator2 GetIterator(MemoryPoolIterator2 begin, int displacement) + { + var result = begin; + for (int i = 0; i < displacement; ++i) + { + result.Take(); + } + + return result; + } + } +} diff --git a/test/Microsoft.AspNet.Server.KestrelTests/UrlPathDecoder.cs b/test/Microsoft.AspNet.Server.KestrelTests/UrlPathDecoder.cs index 928fc84a05..02769fd01f 100644 --- a/test/Microsoft.AspNet.Server.KestrelTests/UrlPathDecoder.cs +++ b/test/Microsoft.AspNet.Server.KestrelTests/UrlPathDecoder.cs @@ -115,7 +115,7 @@ namespace Microsoft.AspNet.Server.KestrelTests var end = GetIterator(begin, rawLength); var end2 = UrlPathDecoder.Unescape(begin, end); - var result = begin.GetString(end2); + var result = begin.GetUtf8String(end2); Assert.Equal(expectLength, result.Length); Assert.Equal(expect, result); @@ -147,7 +147,7 @@ namespace Microsoft.AspNet.Server.KestrelTests var end = GetIterator(begin, raw.Length); var result = UrlPathDecoder.Unescape(begin, end); - Assert.Equal(expect, begin.GetString(result)); + Assert.Equal(expect, begin.GetUtf8String(result)); } private void PositiveAssert(string raw) @@ -156,7 +156,7 @@ namespace Microsoft.AspNet.Server.KestrelTests var end = GetIterator(begin, raw.Length); var result = UrlPathDecoder.Unescape(begin, end); - Assert.NotEqual(raw.Length, begin.GetString(result).Length); + Assert.NotEqual(raw.Length, begin.GetUtf8String(result).Length); } private void NegativeAssert(string raw) @@ -165,7 +165,7 @@ namespace Microsoft.AspNet.Server.KestrelTests var end = GetIterator(begin, raw.Length); var resultEnd = UrlPathDecoder.Unescape(begin, end); - var result = begin.GetString(resultEnd); + var result = begin.GetUtf8String(resultEnd); Assert.Equal(raw, result); } }