diff --git a/benchmarks/Kestrel.Performance/AsciiBytesToStringBenchmark.cs b/benchmarks/Kestrel.Performance/AsciiBytesToStringBenchmark.cs deleted file mode 100644 index 2f9c2423f5..0000000000 --- a/benchmarks/Kestrel.Performance/AsciiBytesToStringBenchmark.cs +++ /dev/null @@ -1,623 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using BenchmarkDotNet.Attributes; -using System; -using System.Collections.Generic; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Text; - -namespace Microsoft.AspNetCore.Server.Kestrel.Performance -{ - public class AsciiBytesToStringBenchmark - { - private const int Iterations = 100; - - private byte[] _asciiBytes; - private string _asciiString = new string('\0', 1024); - - [Params( - BenchmarkTypes.KeepAlive, - BenchmarkTypes.Accept, - BenchmarkTypes.UserAgent, - BenchmarkTypes.Cookie - )] - public BenchmarkTypes Type { get; set; } - - [GlobalSetup] - public void Setup() - { - switch (Type) - { - case BenchmarkTypes.KeepAlive: - _asciiBytes = Encoding.ASCII.GetBytes("keep-alive"); - break; - case BenchmarkTypes.Accept: - _asciiBytes = Encoding.ASCII.GetBytes("text/plain,text/html;q=0.9,application/xhtml+xml;q=0.9,application/xml;q=0.8,*/*;q=0.7"); - break; - case BenchmarkTypes.UserAgent: - _asciiBytes = Encoding.ASCII.GetBytes("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"); - break; - case BenchmarkTypes.Cookie: - _asciiBytes = Encoding.ASCII.GetBytes("prov=20629ccd-8b0f-e8ef-2935-cd26609fc0bc; __qca=P0-1591065732-1479167353442; _ga=GA1.2.1298898376.1479167354; _gat=1; sgt=id=9519gfde_3347_4762_8762_df51458c8ec2; acct=t=why-is-%e0%a5%a7%e0%a5%a8%e0%a5%a9-numeric&s=why-is-%e0%a5%a7%e0%a5%a8%e0%a5%a9-numeric"); - break; - } - - Verify(); - } - - [Benchmark(OperationsPerInvoke = Iterations)] - public unsafe string EncodingAsciiGetChars() - { - for (uint i = 0; i < Iterations; i++) - { - fixed (byte* pBytes = &_asciiBytes[0]) - fixed (char* pString = _asciiString) - { - Encoding.ASCII.GetChars(pBytes, _asciiBytes.Length, pString, _asciiBytes.Length); - } - } - - return _asciiString; - } - - [Benchmark(Baseline = true, OperationsPerInvoke = Iterations)] - public unsafe byte[] KestrelBytesToString() - { - for (uint i = 0; i < Iterations; i++) - { - fixed (byte* pBytes = &_asciiBytes[0]) - fixed (char* pString = _asciiString) - { - TryGetAsciiString(pBytes, pString, _asciiBytes.Length); - } - } - - return _asciiBytes; - } - - [Benchmark(OperationsPerInvoke = Iterations)] - public unsafe byte[] AsciiBytesToStringVectorCheck() - { - for (uint i = 0; i < Iterations; i++) - { - fixed (byte* pBytes = &_asciiBytes[0]) - fixed (char* pString = _asciiString) - { - TryGetAsciiStringVectorCheck(pBytes, pString, _asciiBytes.Length); - } - } - - return _asciiBytes; - } - - [Benchmark(OperationsPerInvoke = Iterations)] - public unsafe byte[] AsciiBytesToStringVectorWiden() - { - // Widen Acceleration is post netcoreapp2.0 - for (uint i = 0; i < Iterations; i++) - { - fixed (byte* pBytes = &_asciiBytes[0]) - fixed (char* pString = _asciiString) - { - TryGetAsciiStringVectorWiden(pBytes, pString, _asciiBytes.Length); - } - } - - return _asciiBytes; - } - - [Benchmark(OperationsPerInvoke = Iterations)] - public unsafe byte[] AsciiBytesToStringSpanWiden() - { - // Widen Acceleration is post netcoreapp2.0 - for (uint i = 0; i < Iterations; i++) - { - fixed (char* pString = _asciiString) - { - TryGetAsciiStringWidenSpan(_asciiBytes, new Span(pString, _asciiString.Length)); - } - } - - return _asciiBytes; - } - - public static bool TryGetAsciiStringWidenSpan(ReadOnlySpan input, Span output) - { - // Start as valid - var isValid = true; - - do - { - // If Vector not-accelerated or remaining less than vector size - if (!Vector.IsHardwareAccelerated || input.Length < Vector.Count) - { - if (IntPtr.Size == 8) // Use Intrinsic switch for branch elimination - { - // 64-bit: Loop longs by default - while ((uint)sizeof(long) <= (uint)input.Length) - { - isValid &= CheckBytesInAsciiRange(MemoryMarshal.Cast(input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - output[4] = (char)input[4]; - output[5] = (char)input[5]; - output[6] = (char)input[6]; - output[7] = (char)input[7]; - - input = input.Slice(sizeof(long)); - output = output.Slice(sizeof(long)); - } - if ((uint)sizeof(int) <= (uint)input.Length) - { - isValid &= CheckBytesInAsciiRange(MemoryMarshal.Cast(input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - - input = input.Slice(sizeof(int)); - output = output.Slice(sizeof(int)); - } - } - else - { - // 32-bit: Loop ints by default - while ((uint)sizeof(int) <= (uint)input.Length) - { - isValid &= CheckBytesInAsciiRange(MemoryMarshal.Cast(input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - - input = input.Slice(sizeof(int)); - output = output.Slice(sizeof(int)); - } - } - if ((uint)sizeof(short) <= (uint)input.Length) - { - isValid &= CheckBytesInAsciiRange(MemoryMarshal.Cast(input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - - input = input.Slice(sizeof(short)); - output = output.Slice(sizeof(short)); - } - if ((uint)sizeof(byte) <= (uint)input.Length) - { - isValid &= CheckBytesInAsciiRange((sbyte)input[0]); - output[0] = (char)input[0]; - } - - return isValid; - } - - // do/while as entry condition already checked - do - { - var vector = MemoryMarshal.Cast>(input)[0]; - isValid &= CheckBytesInAsciiRange(vector); - Vector.Widen( - vector, - out MemoryMarshal.Cast>(output)[0], - out MemoryMarshal.Cast>(output)[1]); - - input = input.Slice(Vector.Count); - output = output.Slice(Vector.Count); - } while (input.Length >= Vector.Count); - - // Vector path done, loop back to do non-Vector - // If is a exact multiple of vector size, bail now - } while (input.Length > 0); - - return isValid; - } - - public static unsafe bool TryGetAsciiStringVectorWiden(byte* input, char* output, int count) - { - // Calculate end position - var end = input + count; - // Start as valid - var isValid = true; - - do - { - // If Vector not-accelerated or remaining less than vector size - if (!Vector.IsHardwareAccelerated || input > end - Vector.Count) - { - if (IntPtr.Size == 8) // Use Intrinsic switch for branch elimination - { - // 64-bit: Loop longs by default - while (input <= end - sizeof(long)) - { - isValid &= CheckBytesInAsciiRange(((long*)input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - output[4] = (char)input[4]; - output[5] = (char)input[5]; - output[6] = (char)input[6]; - output[7] = (char)input[7]; - - input += sizeof(long); - output += sizeof(long); - } - if (input <= end - sizeof(int)) - { - isValid &= CheckBytesInAsciiRange(((int*)input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - - input += sizeof(int); - output += sizeof(int); - } - } - else - { - // 32-bit: Loop ints by default - while (input <= end - sizeof(int)) - { - isValid &= CheckBytesInAsciiRange(((int*)input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - - input += sizeof(int); - output += sizeof(int); - } - } - if (input <= end - sizeof(short)) - { - isValid &= CheckBytesInAsciiRange(((short*)input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - - input += sizeof(short); - output += sizeof(short); - } - if (input < end) - { - isValid &= CheckBytesInAsciiRange(((sbyte*)input)[0]); - output[0] = (char)input[0]; - } - - return isValid; - } - - // do/while as entry condition already checked - do - { - var vector = Unsafe.AsRef>(input); - isValid &= CheckBytesInAsciiRange(vector); - Vector.Widen( - vector, - out Unsafe.AsRef>(output), - out Unsafe.AsRef>(output + Vector.Count)); - - input += Vector.Count; - output += Vector.Count; - } while (input <= end - Vector.Count); - - // Vector path done, loop back to do non-Vector - // If is a exact multiple of vector size, bail now - } while (input < end); - - return isValid; - } - - public static unsafe bool TryGetAsciiStringVectorCheck(byte* input, char* output, int count) - { - // Calculate end position - var end = input + count; - // Start as valid - var isValid = true; - do - { - // If Vector not-accelerated or remaining less than vector size - if (!Vector.IsHardwareAccelerated || input > end - Vector.Count) - { - if (IntPtr.Size == 8) // Use Intrinsic switch for branch elimination - { - // 64-bit: Loop longs by default - while (input <= end - sizeof(long)) - { - isValid &= CheckBytesInAsciiRange(((long*)input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - output[4] = (char)input[4]; - output[5] = (char)input[5]; - output[6] = (char)input[6]; - output[7] = (char)input[7]; - - input += sizeof(long); - output += sizeof(long); - } - if (input <= end - sizeof(int)) - { - isValid &= CheckBytesInAsciiRange(((int*)input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - - input += sizeof(int); - output += sizeof(int); - } - } - else - { - // 32-bit: Loop ints by default - while (input <= end - sizeof(int)) - { - isValid &= CheckBytesInAsciiRange(((int*)input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - - input += sizeof(int); - output += sizeof(int); - } - } - if (input <= end - sizeof(short)) - { - isValid &= CheckBytesInAsciiRange(((short*)input)[0]); - - output[0] = (char)input[0]; - output[1] = (char)input[1]; - - input += sizeof(short); - output += sizeof(short); - } - if (input < end) - { - isValid &= CheckBytesInAsciiRange(((sbyte*)input)[0]); - output[0] = (char)input[0]; - } - - return isValid; - } - - // do/while as entry condition already checked - do - { - isValid &= CheckBytesInAsciiRange(Unsafe.AsRef>(input)); - - // Vector.Widen is only netcoreapp2.1+ so let's do this manually - var i = 0; - do - { - // Vectors are min 16 byte, so lets do 16 byte loops - i += 16; - // Unrolled byte-wise widen - output[0] = (char)input[0]; - output[1] = (char)input[1]; - output[2] = (char)input[2]; - output[3] = (char)input[3]; - output[4] = (char)input[4]; - output[5] = (char)input[5]; - output[6] = (char)input[6]; - output[7] = (char)input[7]; - output[8] = (char)input[8]; - output[9] = (char)input[9]; - output[10] = (char)input[10]; - output[11] = (char)input[11]; - output[12] = (char)input[12]; - output[13] = (char)input[13]; - output[14] = (char)input[14]; - output[15] = (char)input[15]; - - input += 16; - output += 16; - } while (i < Vector.Count); - } while (input <= end - Vector.Count); - - // Vector path done, loop back to do non-Vector - // If is a exact multiple of vector size, bail now - } while (input < end); - - return isValid; - } - - public static unsafe bool TryGetAsciiString(byte* input, char* output, int count) - { - var i = 0; - sbyte* signedInput = (sbyte*)input; - - bool isValid = true; - while (i < count - 11) - { - isValid = isValid && *signedInput > 0 && *(signedInput + 1) > 0 && *(signedInput + 2) > 0 && - *(signedInput + 3) > 0 && *(signedInput + 4) > 0 && *(signedInput + 5) > 0 && *(signedInput + 6) > 0 && - *(signedInput + 7) > 0 && *(signedInput + 8) > 0 && *(signedInput + 9) > 0 && *(signedInput + 10) > 0 && - *(signedInput + 11) > 0; - - i += 12; - *(output) = (char)*(signedInput); - *(output + 1) = (char)*(signedInput + 1); - *(output + 2) = (char)*(signedInput + 2); - *(output + 3) = (char)*(signedInput + 3); - *(output + 4) = (char)*(signedInput + 4); - *(output + 5) = (char)*(signedInput + 5); - *(output + 6) = (char)*(signedInput + 6); - *(output + 7) = (char)*(signedInput + 7); - *(output + 8) = (char)*(signedInput + 8); - *(output + 9) = (char)*(signedInput + 9); - *(output + 10) = (char)*(signedInput + 10); - *(output + 11) = (char)*(signedInput + 11); - output += 12; - signedInput += 12; - } - if (i < count - 5) - { - isValid = isValid && *signedInput > 0 && *(signedInput + 1) > 0 && *(signedInput + 2) > 0 && - *(signedInput + 3) > 0 && *(signedInput + 4) > 0 && *(signedInput + 5) > 0; - - i += 6; - *(output) = (char)*(signedInput); - *(output + 1) = (char)*(signedInput + 1); - *(output + 2) = (char)*(signedInput + 2); - *(output + 3) = (char)*(signedInput + 3); - *(output + 4) = (char)*(signedInput + 4); - *(output + 5) = (char)*(signedInput + 5); - output += 6; - signedInput += 6; - } - if (i < count - 3) - { - isValid = isValid && *signedInput > 0 && *(signedInput + 1) > 0 && *(signedInput + 2) > 0 && - *(signedInput + 3) > 0; - - i += 4; - *(output) = (char)*(signedInput); - *(output + 1) = (char)*(signedInput + 1); - *(output + 2) = (char)*(signedInput + 2); - *(output + 3) = (char)*(signedInput + 3); - output += 4; - signedInput += 4; - } - - while (i < count) - { - isValid = isValid && *signedInput > 0; - - i++; - *output = (char)*signedInput; - output++; - signedInput++; - } - - return isValid; - } - - private static bool CheckBytesInAsciiRange(Vector check) - { - // Vectorized byte range check, signed byte > 0 for 1-127 - return Vector.GreaterThanAll(check, Vector.Zero); - } - - // Validate: bytes != 0 && bytes <= 127 - // Subtract 1 from all bytes to move 0 to high bits - // bitwise or with self to catch all > 127 bytes - // mask off high bits and check if 0 - - [MethodImpl(MethodImplOptions.AggressiveInlining)] // Needs a push - private static bool CheckBytesInAsciiRange(long check) - { - const long HighBits = unchecked((long)0x8080808080808080L); - return (((check - 0x0101010101010101L) | check) & HighBits) == 0; - } - - private static bool CheckBytesInAsciiRange(int check) - { - const int HighBits = unchecked((int)0x80808080); - return (((check - 0x01010101) | check) & HighBits) == 0; - } - - private static bool CheckBytesInAsciiRange(short check) - { - const short HighBits = unchecked((short)0x8080); - return (((short)(check - 0x0101) | check) & HighBits) == 0; - } - - private static bool CheckBytesInAsciiRange(sbyte check) - => check > 0; - - private void Verify() - { - var verification = EncodingAsciiGetChars().Substring(0, _asciiBytes.Length); - - BlankString('\0'); - EncodingAsciiGetChars(); - VerifyString(verification, '\0'); - BlankString(' '); - EncodingAsciiGetChars(); - VerifyString(verification, ' '); - - BlankString('\0'); - KestrelBytesToString(); - VerifyString(verification, '\0'); - BlankString(' '); - KestrelBytesToString(); - VerifyString(verification, ' '); - - BlankString('\0'); - AsciiBytesToStringVectorCheck(); - VerifyString(verification, '\0'); - BlankString(' '); - AsciiBytesToStringVectorCheck(); - VerifyString(verification, ' '); - - BlankString('\0'); - AsciiBytesToStringVectorWiden(); - VerifyString(verification, '\0'); - BlankString(' '); - AsciiBytesToStringVectorWiden(); - VerifyString(verification, ' '); - - BlankString('\0'); - AsciiBytesToStringSpanWiden(); - VerifyString(verification, '\0'); - BlankString(' '); - AsciiBytesToStringSpanWiden(); - VerifyString(verification, ' '); - } - - private unsafe void BlankString(char ch) - { - fixed (char* pString = _asciiString) - { - for (var i = 0; i < _asciiString.Length; i++) - { - *(pString + i) = ch; - } - } - } - - private unsafe void VerifyString(string verification, char ch) - { - fixed (char* pString = _asciiString) - { - var i = 0; - for (; i < verification.Length; i++) - { - if (*(pString + i) != verification[i]) throw new Exception($"Verify failed, saw {(int)*(pString + i)} expected {(int)verification[i]} at position {i}"); - } - for (; i < _asciiString.Length; i++) - { - if (*(pString + i) != ch) throw new Exception($"Verify failed, saw {(int)*(pString + i)} expected {(int)ch} at position {i}"); ; - } - } - } - - public enum BenchmarkTypes - { - KeepAlive, - Accept, - UserAgent, - Cookie, - } - } -} diff --git a/benchmarks/Kestrel.Performance/BytesToStringBenchmark.cs b/benchmarks/Kestrel.Performance/BytesToStringBenchmark.cs new file mode 100644 index 0000000000..28f365d7da --- /dev/null +++ b/benchmarks/Kestrel.Performance/BytesToStringBenchmark.cs @@ -0,0 +1,82 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using BenchmarkDotNet.Attributes; +using Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure; + +namespace Microsoft.AspNetCore.Server.Kestrel.Performance +{ + public class BytesToStringBenchmark + { + private const int Iterations = 50; + + private byte[] _asciiBytes; + private byte[] _utf8Bytes; + + [Params( + BenchmarkTypes.KeepAlive, + BenchmarkTypes.Accept, + BenchmarkTypes.UserAgent, + BenchmarkTypes.Cookie + )] + public BenchmarkTypes Type { get; set; } + + [GlobalSetup] + public void Setup() + { + switch (Type) + { + case BenchmarkTypes.KeepAlive: + // keep-alive + _asciiBytes = new byte[] { 0x6b, 0x65, 0x65, 0x70, 0x2d, 0x61, 0x6c, 0x69, 0x76, 0x65 }; + // kéép-álivé + _utf8Bytes = new byte[] { 0x6b, 0xc3, 0xa9, 0xc3, 0xa9, 0x70, 0x2d, 0xc3, 0xa1, 0x6c, 0x69, 0x76, 0xc3, 0xa9 }; + break; + case BenchmarkTypes.Accept: + // text/plain,text/html;q=0.9,application/xhtml+xml;q=0.9,application/xml;q=0.8,*/*;q=0.7 + _asciiBytes = new byte[] { 0x74, 0x65, 0x78, 0x74, 0x2f, 0x70, 0x6c, 0x61, 0x69, 0x6e, 0x2c, 0x74, 0x65, 0x78, 0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x39, 0x2c, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x68, 0x74, 0x6d, 0x6c, 0x2b, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x39, 0x2c, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x38, 0x2c, 0x2a, 0x2f, 0x2a, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x37 }; + // téxt/pláin,téxt/html;q=0.9,ápplicátion/xhtml+xml;q=0.9,ápplicátion/xml;q=0.8,*/*;q=0.7 + _utf8Bytes = new byte[] { 0x74, 0xc3, 0xa9, 0x78, 0x74, 0x2f, 0x70, 0x6c, 0xc3, 0xa1, 0x69, 0x6e, 0x2c, 0x74, 0xc3, 0xa9, 0x78, 0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x39, 0x2c, 0xc3, 0xa1, 0x70, 0x70, 0x6c, 0x69, 0x63, 0xc3, 0xa1, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x68, 0x74, 0x6d, 0x6c, 0x2b, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x39, 0x2c, 0xc3, 0xa1, 0x70, 0x70, 0x6c, 0x69, 0x63, 0xc3, 0xa1, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x38, 0x2c, 0x2a, 0x2f, 0x2a, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x37 }; + break; + case BenchmarkTypes.UserAgent: + // Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36 + _asciiBytes = new byte[] { 0x4d, 0x6f, 0x7a, 0x69, 0x6c, 0x6c, 0x61, 0x2f, 0x35, 0x2e, 0x30, 0x20, 0x28, 0x57, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x73, 0x20, 0x4e, 0x54, 0x20, 0x31, 0x30, 0x2e, 0x30, 0x3b, 0x20, 0x57, 0x4f, 0x57, 0x36, 0x34, 0x29, 0x20, 0x41, 0x70, 0x70, 0x6c, 0x65, 0x57, 0x65, 0x62, 0x4b, 0x69, 0x74, 0x2f, 0x35, 0x33, 0x37, 0x2e, 0x33, 0x36, 0x20, 0x28, 0x4b, 0x48, 0x54, 0x4d, 0x4c, 0x2c, 0x20, 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x47, 0x65, 0x63, 0x6b, 0x6f, 0x29, 0x20, 0x43, 0x68, 0x72, 0x6f, 0x6d, 0x65, 0x2f, 0x35, 0x34, 0x2e, 0x30, 0x2e, 0x32, 0x38, 0x34, 0x30, 0x2e, 0x39, 0x39, 0x20, 0x53, 0x61, 0x66, 0x61, 0x72, 0x69, 0x2f, 0x35, 0x33, 0x37, 0x2e, 0x33, 0x36 }; + // Mozillá/5.0 (Windows NT 10.0; WOW64) áppléWébKit/537.36 (KHTML, liké Gécko) Chromé/54.0.2840.99 Sáfári/537.36 + _utf8Bytes = new byte[] { 0x4d, 0x6f, 0x7a, 0x69, 0x6c, 0x6c, 0xc3, 0xa1, 0x2f, 0x35, 0x2e, 0x30, 0x20, 0x28, 0x57, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x73, 0x20, 0x4e, 0x54, 0x20, 0x31, 0x30, 0x2e, 0x30, 0x3b, 0x20, 0x57, 0x4f, 0x57, 0x36, 0x34, 0x29, 0x20, 0xc3, 0xa1, 0x70, 0x70, 0x6c, 0xc3, 0xa9, 0x57, 0xc3, 0xa9, 0x62, 0x4b, 0x69, 0x74, 0x2f, 0x35, 0x33, 0x37, 0x2e, 0x33, 0x36, 0x20, 0x28, 0x4b, 0x48, 0x54, 0x4d, 0x4c, 0x2c, 0x20, 0x6c, 0x69, 0x6b, 0xc3, 0xa9, 0x20, 0x47, 0xc3, 0xa9, 0x63, 0x6b, 0x6f, 0x29, 0x20, 0x43, 0x68, 0x72, 0x6f, 0x6d, 0xc3, 0xa9, 0x2f, 0x35, 0x34, 0x2e, 0x30, 0x2e, 0x32, 0x38, 0x34, 0x30, 0x2e, 0x39, 0x39, 0x20, 0x53, 0xc3, 0xa1, 0x66, 0xc3, 0xa1, 0x72, 0x69, 0x2f, 0x35, 0x33, 0x37, 0x2e, 0x33, 0x36 }; + break; + case BenchmarkTypes.Cookie: + // prov=20629ccd-8b0f-e8ef-2935-cd26609fc0bc; __qca=P0-1591065732-1479167353442; _ga=GA1.2.1298898376.1479167354; _gat=1; sgt=id=9519gfde_3347_4762_8762_df51458c8ec2; acct=t=why-is-%e0%a5%a7%e0%a5%a8%e0%a5%a9-numeric&s=why-is-%e0%a5%a7%e0%a5%a8%e0%a5%a9-numeric + _asciiBytes = new byte[] { 0x70, 0x72, 0x6f, 0x76, 0x3d, 0x32, 0x30, 0x36, 0x32, 0x39, 0x63, 0x63, 0x64, 0x2d, 0x38, 0x62, 0x30, 0x66, 0x2d, 0x65, 0x38, 0x65, 0x66, 0x2d, 0x32, 0x39, 0x33, 0x35, 0x2d, 0x63, 0x64, 0x32, 0x36, 0x36, 0x30, 0x39, 0x66, 0x63, 0x30, 0x62, 0x63, 0x3b, 0x20, 0x5f, 0x5f, 0x71, 0x63, 0x61, 0x3d, 0x50, 0x30, 0x2d, 0x31, 0x35, 0x39, 0x31, 0x30, 0x36, 0x35, 0x37, 0x33, 0x32, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x31, 0x36, 0x37, 0x33, 0x35, 0x33, 0x34, 0x34, 0x32, 0x3b, 0x20, 0x5f, 0x67, 0x61, 0x3d, 0x47, 0x41, 0x31, 0x2e, 0x32, 0x2e, 0x31, 0x32, 0x39, 0x38, 0x38, 0x39, 0x38, 0x33, 0x37, 0x36, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x31, 0x36, 0x37, 0x33, 0x35, 0x34, 0x3b, 0x20, 0x5f, 0x67, 0x61, 0x74, 0x3d, 0x31, 0x3b, 0x20, 0x73, 0x67, 0x74, 0x3d, 0x69, 0x64, 0x3d, 0x39, 0x35, 0x31, 0x39, 0x67, 0x66, 0x64, 0x65, 0x5f, 0x33, 0x33, 0x34, 0x37, 0x5f, 0x34, 0x37, 0x36, 0x32, 0x5f, 0x38, 0x37, 0x36, 0x32, 0x5f, 0x64, 0x66, 0x35, 0x31, 0x34, 0x35, 0x38, 0x63, 0x38, 0x65, 0x63, 0x32, 0x3b, 0x20, 0x61, 0x63, 0x63, 0x74, 0x3d, 0x74, 0x3d, 0x77, 0x68, 0x79, 0x2d, 0x69, 0x73, 0x2d, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x37, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x38, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x39, 0x2d, 0x6e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63, 0x26, 0x73, 0x3d, 0x77, 0x68, 0x79, 0x2d, 0x69, 0x73, 0x2d, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x37, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x38, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x39, 0x2d, 0x6e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63 }; + // prov=20629ccd-8b0f-é8éf-2935-cd26609fc0bc; __qcá=P0-1591065732-1479167353442; _gá=Gá1.2.1298898376.1479167354; _gát=1; sgt=id=9519gfdé_3347_4762_8762_df51458c8éc2; ácct=t=why-is-%é0%á5%á7%é0%á5%á8%é0%á5%á9-numéric&s=why-is-%é0%á5%á7%é0%á5%á8%é0%á5%á9-numéric + _utf8Bytes = new byte[] { 0x70, 0x72, 0x6f, 0x76, 0x3d, 0x32, 0x30, 0x36, 0x32, 0x39, 0x63, 0x63, 0x64, 0x2d, 0x38, 0x62, 0x30, 0x66, 0x2d, 0xc3, 0xa9, 0x38, 0xc3, 0xa9, 0x66, 0x2d, 0x32, 0x39, 0x33, 0x35, 0x2d, 0x63, 0x64, 0x32, 0x36, 0x36, 0x30, 0x39, 0x66, 0x63, 0x30, 0x62, 0x63, 0x3b, 0x20, 0x5f, 0x5f, 0x71, 0x63, 0xc3, 0xa1, 0x3d, 0x50, 0x30, 0x2d, 0x31, 0x35, 0x39, 0x31, 0x30, 0x36, 0x35, 0x37, 0x33, 0x32, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x31, 0x36, 0x37, 0x33, 0x35, 0x33, 0x34, 0x34, 0x32, 0x3b, 0x20, 0x5f, 0x67, 0xc3, 0xa1, 0x3d, 0x47, 0xc3, 0xa1, 0x31, 0x2e, 0x32, 0x2e, 0x31, 0x32, 0x39, 0x38, 0x38, 0x39, 0x38, 0x33, 0x37, 0x36, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x31, 0x36, 0x37, 0x33, 0x35, 0x34, 0x3b, 0x20, 0x5f, 0x67, 0xc3, 0xa1, 0x74, 0x3d, 0x31, 0x3b, 0x20, 0x73, 0x67, 0x74, 0x3d, 0x69, 0x64, 0x3d, 0x39, 0x35, 0x31, 0x39, 0x67, 0x66, 0x64, 0xc3, 0xa9, 0x5f, 0x33, 0x33, 0x34, 0x37, 0x5f, 0x34, 0x37, 0x36, 0x32, 0x5f, 0x38, 0x37, 0x36, 0x32, 0x5f, 0x64, 0x66, 0x35, 0x31, 0x34, 0x35, 0x38, 0x63, 0x38, 0xc3, 0xa9, 0x63, 0x32, 0x3b, 0x20, 0xc3, 0xa1, 0x63, 0x63, 0x74, 0x3d, 0x74, 0x3d, 0x77, 0x68, 0x79, 0x2d, 0x69, 0x73, 0x2d, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x37, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x38, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x39, 0x2d, 0x6e, 0x75, 0x6d, 0xc3, 0xa9, 0x72, 0x69, 0x63, 0x26, 0x73, 0x3d, 0x77, 0x68, 0x79, 0x2d, 0x69, 0x73, 0x2d, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x37, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x38, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x39, 0x2d, 0x6e, 0x75, 0x6d, 0xc3, 0xa9, 0x72, 0x69, 0x63 }; + break; + } + } + + [Benchmark(Baseline = true, OperationsPerInvoke = Iterations)] + public void AsciiBytesToString() + { + for (uint i = 0; i < Iterations; i++) + { + HttpUtilities.GetAsciiStringNonNullCharacters(_asciiBytes); + } + } + + [Benchmark(OperationsPerInvoke = Iterations)] + public void Utf8BytesToString() + { + for (uint i = 0; i < Iterations; i++) + { + HttpUtilities.GetAsciiOrUTF8StringNonNullCharacters(_utf8Bytes); + } + } + + public enum BenchmarkTypes + { + KeepAlive, + Accept, + UserAgent, + Cookie, + } + } +} diff --git a/src/Kestrel.Core/Internal/Http/HttpProtocol.cs b/src/Kestrel.Core/Internal/Http/HttpProtocol.cs index 5340ee8a0c..0c27877c4a 100644 --- a/src/Kestrel.Core/Internal/Http/HttpProtocol.cs +++ b/src/Kestrel.Core/Internal/Http/HttpProtocol.cs @@ -439,7 +439,7 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Http { BadHttpRequestException.Throw(RequestRejectionReason.TooManyHeaders); } - var valueString = value.GetAsciiStringNonNullCharacters(); + var valueString = value.GetAsciiOrUTF8StringNonNullCharacters(); HttpRequestHeaders.Append(name, valueString); } diff --git a/src/Kestrel.Core/Internal/Infrastructure/HttpUtilities.cs b/src/Kestrel.Core/Internal/Infrastructure/HttpUtilities.cs index 6ee60b5018..9b3b182dd9 100644 --- a/src/Kestrel.Core/Internal/Infrastructure/HttpUtilities.cs +++ b/src/Kestrel.Core/Internal/Infrastructure/HttpUtilities.cs @@ -29,6 +29,8 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure private const ulong _http10VersionLong = 3471766442030158920; // GetAsciiStringAsLong("HTTP/1.0"); const results in better codegen private const ulong _http11VersionLong = 3543824036068086856; // GetAsciiStringAsLong("HTTP/1.1"); const results in better codegen + private static readonly UTF8EncodingSealed HeaderValueEncoding = new UTF8EncodingSealed(); + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void SetKnownMethod(ulong mask, ulong knownMethodUlong, HttpMethod knownMethod, int length) { @@ -105,6 +107,41 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure return asciiString; } + public static unsafe string GetAsciiOrUTF8StringNonNullCharacters(this Span span) + { + if (span.IsEmpty) + { + return string.Empty; + } + + var resultString = new string('\0', span.Length); + + fixed (char* output = resultString) + fixed (byte* buffer = &MemoryMarshal.GetReference(span)) + { + // This version if AsciiUtilities returns null if there are any null (0 byte) characters + // in the string + if (!StringUtilities.TryGetAsciiString(buffer, output, span.Length)) + { + // null characters are considered invalid + if (span.IndexOf((byte)0) != -1) + { + throw new InvalidOperationException(); + } + + try + { + resultString = HeaderValueEncoding.GetString(buffer, span.Length); + } + catch (DecoderFallbackException) + { + throw new InvalidOperationException(); + } + } + } + return resultString; + } + public static string GetAsciiStringEscaped(this Span span, int maxChars) { var sb = new StringBuilder(); @@ -505,5 +542,13 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure // Check if less than 6 representing chars 'a' - 'f' || (uint)((ch | 32) - 'a') < 6u; } + + // Allow for de-virtualization (see https://github.com/dotnet/coreclr/pull/9230) + private sealed class UTF8EncodingSealed : UTF8Encoding + { + public UTF8EncodingSealed() : base(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true) { } + + public override byte[] GetPreamble() => Array.Empty(); + } } } diff --git a/test/Kestrel.Core.Tests/Http1ConnectionTests.cs b/test/Kestrel.Core.Tests/Http1ConnectionTests.cs index 19e17d4dc3..9dd6c5e488 100644 --- a/test/Kestrel.Core.Tests/Http1ConnectionTests.cs +++ b/test/Kestrel.Core.Tests/Http1ConnectionTests.cs @@ -81,6 +81,41 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Tests _pipelineFactory.Dispose(); } + [Fact] + public async Task TakeMessageHeadersSucceedsWhenHeaderValueContainsUTF8() + { + var headerName = "Header"; + var headerValueBytes = new byte[] { 0x46, 0x72, 0x61, 0x6e, 0xc3, 0xa7, 0x6f, 0x69, 0x73 }; + var headerValue = Encoding.UTF8.GetString(headerValueBytes); + _http1Connection.Reset(); + + await _application.Output.WriteAsync(Encoding.UTF8.GetBytes($"{headerName}: ")); + await _application.Output.WriteAsync(headerValueBytes); + await _application.Output.WriteAsync(Encoding.UTF8.GetBytes("\r\n\r\n")); + var readableBuffer = (await _transport.Input.ReadAsync()).Buffer; + + _http1Connection.TakeMessageHeaders(readableBuffer, out _consumed, out _examined); + _transport.Input.AdvanceTo(_consumed, _examined); + + Assert.Equal(headerValue, _http1Connection.RequestHeaders[headerName]); + } + + [Fact] + public async Task TakeMessageHeadersThrowsWhenHeaderValueContainsExtendedASCII() + { + var extendedAsciiEncoding = Encoding.GetEncoding("ISO-8859-1"); + var headerName = "Header"; + var headerValueBytes = new byte[] { 0x46, 0x72, 0x61, 0x6e, 0xe7, 0x6f, 0x69, 0x73 }; + _http1Connection.Reset(); + + await _application.Output.WriteAsync(extendedAsciiEncoding.GetBytes($"{headerName}: ")); + await _application.Output.WriteAsync(headerValueBytes); + await _application.Output.WriteAsync(extendedAsciiEncoding.GetBytes("\r\n\r\n")); + var readableBuffer = (await _transport.Input.ReadAsync()).Buffer; + + var exception = Assert.Throws(() => _http1Connection.TakeMessageHeaders(readableBuffer, out _consumed, out _examined)); + } + [Fact] public async Task TakeMessageHeadersThrowsWhenHeadersExceedTotalSizeLimit() { diff --git a/test/Kestrel.Core.Tests/UTF8Decoding.cs b/test/Kestrel.Core.Tests/UTF8Decoding.cs new file mode 100644 index 0000000000..532e13781d --- /dev/null +++ b/test/Kestrel.Core.Tests/UTF8Decoding.cs @@ -0,0 +1,43 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Linq; +using System.Numerics; +using Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure; +using Xunit; + +namespace Microsoft.AspNetCore.Server.Kestrel.Core.Tests +{ + public class UTF8DecodingTests + { + [Theory] + [InlineData(new byte[] { 0x01 })] // 1 byte: Control character, lowest UTF-8 character we will allow to be decoded since 0x00 is rejected, + [InlineData(new byte[] { 0xc2, 0xa0})] // 2 bytes: Non-breaking space, lowest valid UTF-8 that is not a valid ASCII character + [InlineData(new byte[] { 0xef, 0xbf, 0xbd })] // 3 bytes: Replacement character, highest UTF-8 character currently encoded in the UTF-8 code page + private void FullUTF8RangeSupported(byte[] encodedBytes) + { + var s = encodedBytes.AsSpan().GetAsciiOrUTF8StringNonNullCharacters(); + + Assert.Equal(1, s.Length); + } + + [Theory] + [InlineData(new byte[] { 0x00 })] // We reject the null character + [InlineData(new byte[] { 0x80 })] // First valid Extended ASCII that is not a valid UTF-8 Encoding + [InlineData(new byte[] { 0x20, 0xac })] // First valid Extended ASCII that is not a valid UTF-8 Encoding + private void ExceptionThrownForZeroOrNonAscii(byte[] bytes) + { + for (var length = bytes.Length; length < Vector.Count * 4 + bytes.Length; length++) + { + for (var position = 0; position <= length - bytes.Length; position++) + { + var byteRange = Enumerable.Range(1, length).Select(x => (byte)x).ToArray(); + Array.Copy(bytes, 0, byteRange, position, bytes.Length); + + Assert.Throws(() => byteRange.AsSpan().GetAsciiOrUTF8StringNonNullCharacters()); + } + } + } + } +} diff --git a/test/Kestrel.InMemory.FunctionalTests/Http2/Http2TestBase.cs b/test/Kestrel.InMemory.FunctionalTests/Http2/Http2TestBase.cs index 7449a22f2f..92e4921508 100644 --- a/test/Kestrel.InMemory.FunctionalTests/Http2/Http2TestBase.cs +++ b/test/Kestrel.InMemory.FunctionalTests/Http2/Http2TestBase.cs @@ -304,7 +304,7 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Tests void IHttpHeadersHandler.OnHeader(Span name, Span value) { - _decodedHeaders[name.GetAsciiStringNonNullCharacters()] = value.GetAsciiStringNonNullCharacters(); + _decodedHeaders[name.GetAsciiStringNonNullCharacters()] = value.GetAsciiOrUTF8StringNonNullCharacters(); } protected async Task InitializeConnectionAsync(RequestDelegate application, int expectedSettingsLegnth = 6)