Allow UTF8 encoded header values

This commit is contained in:
John Luo 2018-08-27 02:14:24 -07:00
parent 80b30b21ee
commit 153020ef8a
7 changed files with 207 additions and 625 deletions

View File

@ -1,623 +0,0 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using BenchmarkDotNet.Attributes;
using System;
using System.Collections.Generic;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
namespace Microsoft.AspNetCore.Server.Kestrel.Performance
{
public class AsciiBytesToStringBenchmark
{
private const int Iterations = 100;
private byte[] _asciiBytes;
private string _asciiString = new string('\0', 1024);
[Params(
BenchmarkTypes.KeepAlive,
BenchmarkTypes.Accept,
BenchmarkTypes.UserAgent,
BenchmarkTypes.Cookie
)]
public BenchmarkTypes Type { get; set; }
[GlobalSetup]
public void Setup()
{
switch (Type)
{
case BenchmarkTypes.KeepAlive:
_asciiBytes = Encoding.ASCII.GetBytes("keep-alive");
break;
case BenchmarkTypes.Accept:
_asciiBytes = Encoding.ASCII.GetBytes("text/plain,text/html;q=0.9,application/xhtml+xml;q=0.9,application/xml;q=0.8,*/*;q=0.7");
break;
case BenchmarkTypes.UserAgent:
_asciiBytes = Encoding.ASCII.GetBytes("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36");
break;
case BenchmarkTypes.Cookie:
_asciiBytes = Encoding.ASCII.GetBytes("prov=20629ccd-8b0f-e8ef-2935-cd26609fc0bc; __qca=P0-1591065732-1479167353442; _ga=GA1.2.1298898376.1479167354; _gat=1; sgt=id=9519gfde_3347_4762_8762_df51458c8ec2; acct=t=why-is-%e0%a5%a7%e0%a5%a8%e0%a5%a9-numeric&s=why-is-%e0%a5%a7%e0%a5%a8%e0%a5%a9-numeric");
break;
}
Verify();
}
[Benchmark(OperationsPerInvoke = Iterations)]
public unsafe string EncodingAsciiGetChars()
{
for (uint i = 0; i < Iterations; i++)
{
fixed (byte* pBytes = &_asciiBytes[0])
fixed (char* pString = _asciiString)
{
Encoding.ASCII.GetChars(pBytes, _asciiBytes.Length, pString, _asciiBytes.Length);
}
}
return _asciiString;
}
[Benchmark(Baseline = true, OperationsPerInvoke = Iterations)]
public unsafe byte[] KestrelBytesToString()
{
for (uint i = 0; i < Iterations; i++)
{
fixed (byte* pBytes = &_asciiBytes[0])
fixed (char* pString = _asciiString)
{
TryGetAsciiString(pBytes, pString, _asciiBytes.Length);
}
}
return _asciiBytes;
}
[Benchmark(OperationsPerInvoke = Iterations)]
public unsafe byte[] AsciiBytesToStringVectorCheck()
{
for (uint i = 0; i < Iterations; i++)
{
fixed (byte* pBytes = &_asciiBytes[0])
fixed (char* pString = _asciiString)
{
TryGetAsciiStringVectorCheck(pBytes, pString, _asciiBytes.Length);
}
}
return _asciiBytes;
}
[Benchmark(OperationsPerInvoke = Iterations)]
public unsafe byte[] AsciiBytesToStringVectorWiden()
{
// Widen Acceleration is post netcoreapp2.0
for (uint i = 0; i < Iterations; i++)
{
fixed (byte* pBytes = &_asciiBytes[0])
fixed (char* pString = _asciiString)
{
TryGetAsciiStringVectorWiden(pBytes, pString, _asciiBytes.Length);
}
}
return _asciiBytes;
}
[Benchmark(OperationsPerInvoke = Iterations)]
public unsafe byte[] AsciiBytesToStringSpanWiden()
{
// Widen Acceleration is post netcoreapp2.0
for (uint i = 0; i < Iterations; i++)
{
fixed (char* pString = _asciiString)
{
TryGetAsciiStringWidenSpan(_asciiBytes, new Span<char>(pString, _asciiString.Length));
}
}
return _asciiBytes;
}
public static bool TryGetAsciiStringWidenSpan(ReadOnlySpan<byte> input, Span<char> output)
{
// Start as valid
var isValid = true;
do
{
// If Vector not-accelerated or remaining less than vector size
if (!Vector.IsHardwareAccelerated || input.Length < Vector<sbyte>.Count)
{
if (IntPtr.Size == 8) // Use Intrinsic switch for branch elimination
{
// 64-bit: Loop longs by default
while ((uint)sizeof(long) <= (uint)input.Length)
{
isValid &= CheckBytesInAsciiRange(MemoryMarshal.Cast<byte, long>(input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
output[4] = (char)input[4];
output[5] = (char)input[5];
output[6] = (char)input[6];
output[7] = (char)input[7];
input = input.Slice(sizeof(long));
output = output.Slice(sizeof(long));
}
if ((uint)sizeof(int) <= (uint)input.Length)
{
isValid &= CheckBytesInAsciiRange(MemoryMarshal.Cast<byte, int>(input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
input = input.Slice(sizeof(int));
output = output.Slice(sizeof(int));
}
}
else
{
// 32-bit: Loop ints by default
while ((uint)sizeof(int) <= (uint)input.Length)
{
isValid &= CheckBytesInAsciiRange(MemoryMarshal.Cast<byte, int>(input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
input = input.Slice(sizeof(int));
output = output.Slice(sizeof(int));
}
}
if ((uint)sizeof(short) <= (uint)input.Length)
{
isValid &= CheckBytesInAsciiRange(MemoryMarshal.Cast<byte, short>(input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
input = input.Slice(sizeof(short));
output = output.Slice(sizeof(short));
}
if ((uint)sizeof(byte) <= (uint)input.Length)
{
isValid &= CheckBytesInAsciiRange((sbyte)input[0]);
output[0] = (char)input[0];
}
return isValid;
}
// do/while as entry condition already checked
do
{
var vector = MemoryMarshal.Cast<byte, Vector<sbyte>>(input)[0];
isValid &= CheckBytesInAsciiRange(vector);
Vector.Widen(
vector,
out MemoryMarshal.Cast<char, Vector<short>>(output)[0],
out MemoryMarshal.Cast<char, Vector<short>>(output)[1]);
input = input.Slice(Vector<sbyte>.Count);
output = output.Slice(Vector<sbyte>.Count);
} while (input.Length >= Vector<sbyte>.Count);
// Vector path done, loop back to do non-Vector
// If is a exact multiple of vector size, bail now
} while (input.Length > 0);
return isValid;
}
public static unsafe bool TryGetAsciiStringVectorWiden(byte* input, char* output, int count)
{
// Calculate end position
var end = input + count;
// Start as valid
var isValid = true;
do
{
// If Vector not-accelerated or remaining less than vector size
if (!Vector.IsHardwareAccelerated || input > end - Vector<sbyte>.Count)
{
if (IntPtr.Size == 8) // Use Intrinsic switch for branch elimination
{
// 64-bit: Loop longs by default
while (input <= end - sizeof(long))
{
isValid &= CheckBytesInAsciiRange(((long*)input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
output[4] = (char)input[4];
output[5] = (char)input[5];
output[6] = (char)input[6];
output[7] = (char)input[7];
input += sizeof(long);
output += sizeof(long);
}
if (input <= end - sizeof(int))
{
isValid &= CheckBytesInAsciiRange(((int*)input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
input += sizeof(int);
output += sizeof(int);
}
}
else
{
// 32-bit: Loop ints by default
while (input <= end - sizeof(int))
{
isValid &= CheckBytesInAsciiRange(((int*)input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
input += sizeof(int);
output += sizeof(int);
}
}
if (input <= end - sizeof(short))
{
isValid &= CheckBytesInAsciiRange(((short*)input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
input += sizeof(short);
output += sizeof(short);
}
if (input < end)
{
isValid &= CheckBytesInAsciiRange(((sbyte*)input)[0]);
output[0] = (char)input[0];
}
return isValid;
}
// do/while as entry condition already checked
do
{
var vector = Unsafe.AsRef<Vector<sbyte>>(input);
isValid &= CheckBytesInAsciiRange(vector);
Vector.Widen(
vector,
out Unsafe.AsRef<Vector<short>>(output),
out Unsafe.AsRef<Vector<short>>(output + Vector<short>.Count));
input += Vector<sbyte>.Count;
output += Vector<sbyte>.Count;
} while (input <= end - Vector<sbyte>.Count);
// Vector path done, loop back to do non-Vector
// If is a exact multiple of vector size, bail now
} while (input < end);
return isValid;
}
public static unsafe bool TryGetAsciiStringVectorCheck(byte* input, char* output, int count)
{
// Calculate end position
var end = input + count;
// Start as valid
var isValid = true;
do
{
// If Vector not-accelerated or remaining less than vector size
if (!Vector.IsHardwareAccelerated || input > end - Vector<sbyte>.Count)
{
if (IntPtr.Size == 8) // Use Intrinsic switch for branch elimination
{
// 64-bit: Loop longs by default
while (input <= end - sizeof(long))
{
isValid &= CheckBytesInAsciiRange(((long*)input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
output[4] = (char)input[4];
output[5] = (char)input[5];
output[6] = (char)input[6];
output[7] = (char)input[7];
input += sizeof(long);
output += sizeof(long);
}
if (input <= end - sizeof(int))
{
isValid &= CheckBytesInAsciiRange(((int*)input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
input += sizeof(int);
output += sizeof(int);
}
}
else
{
// 32-bit: Loop ints by default
while (input <= end - sizeof(int))
{
isValid &= CheckBytesInAsciiRange(((int*)input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
input += sizeof(int);
output += sizeof(int);
}
}
if (input <= end - sizeof(short))
{
isValid &= CheckBytesInAsciiRange(((short*)input)[0]);
output[0] = (char)input[0];
output[1] = (char)input[1];
input += sizeof(short);
output += sizeof(short);
}
if (input < end)
{
isValid &= CheckBytesInAsciiRange(((sbyte*)input)[0]);
output[0] = (char)input[0];
}
return isValid;
}
// do/while as entry condition already checked
do
{
isValid &= CheckBytesInAsciiRange(Unsafe.AsRef<Vector<sbyte>>(input));
// Vector.Widen is only netcoreapp2.1+ so let's do this manually
var i = 0;
do
{
// Vectors are min 16 byte, so lets do 16 byte loops
i += 16;
// Unrolled byte-wise widen
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
output[4] = (char)input[4];
output[5] = (char)input[5];
output[6] = (char)input[6];
output[7] = (char)input[7];
output[8] = (char)input[8];
output[9] = (char)input[9];
output[10] = (char)input[10];
output[11] = (char)input[11];
output[12] = (char)input[12];
output[13] = (char)input[13];
output[14] = (char)input[14];
output[15] = (char)input[15];
input += 16;
output += 16;
} while (i < Vector<sbyte>.Count);
} while (input <= end - Vector<sbyte>.Count);
// Vector path done, loop back to do non-Vector
// If is a exact multiple of vector size, bail now
} while (input < end);
return isValid;
}
public static unsafe bool TryGetAsciiString(byte* input, char* output, int count)
{
var i = 0;
sbyte* signedInput = (sbyte*)input;
bool isValid = true;
while (i < count - 11)
{
isValid = isValid && *signedInput > 0 && *(signedInput + 1) > 0 && *(signedInput + 2) > 0 &&
*(signedInput + 3) > 0 && *(signedInput + 4) > 0 && *(signedInput + 5) > 0 && *(signedInput + 6) > 0 &&
*(signedInput + 7) > 0 && *(signedInput + 8) > 0 && *(signedInput + 9) > 0 && *(signedInput + 10) > 0 &&
*(signedInput + 11) > 0;
i += 12;
*(output) = (char)*(signedInput);
*(output + 1) = (char)*(signedInput + 1);
*(output + 2) = (char)*(signedInput + 2);
*(output + 3) = (char)*(signedInput + 3);
*(output + 4) = (char)*(signedInput + 4);
*(output + 5) = (char)*(signedInput + 5);
*(output + 6) = (char)*(signedInput + 6);
*(output + 7) = (char)*(signedInput + 7);
*(output + 8) = (char)*(signedInput + 8);
*(output + 9) = (char)*(signedInput + 9);
*(output + 10) = (char)*(signedInput + 10);
*(output + 11) = (char)*(signedInput + 11);
output += 12;
signedInput += 12;
}
if (i < count - 5)
{
isValid = isValid && *signedInput > 0 && *(signedInput + 1) > 0 && *(signedInput + 2) > 0 &&
*(signedInput + 3) > 0 && *(signedInput + 4) > 0 && *(signedInput + 5) > 0;
i += 6;
*(output) = (char)*(signedInput);
*(output + 1) = (char)*(signedInput + 1);
*(output + 2) = (char)*(signedInput + 2);
*(output + 3) = (char)*(signedInput + 3);
*(output + 4) = (char)*(signedInput + 4);
*(output + 5) = (char)*(signedInput + 5);
output += 6;
signedInput += 6;
}
if (i < count - 3)
{
isValid = isValid && *signedInput > 0 && *(signedInput + 1) > 0 && *(signedInput + 2) > 0 &&
*(signedInput + 3) > 0;
i += 4;
*(output) = (char)*(signedInput);
*(output + 1) = (char)*(signedInput + 1);
*(output + 2) = (char)*(signedInput + 2);
*(output + 3) = (char)*(signedInput + 3);
output += 4;
signedInput += 4;
}
while (i < count)
{
isValid = isValid && *signedInput > 0;
i++;
*output = (char)*signedInput;
output++;
signedInput++;
}
return isValid;
}
private static bool CheckBytesInAsciiRange(Vector<sbyte> check)
{
// Vectorized byte range check, signed byte > 0 for 1-127
return Vector.GreaterThanAll(check, Vector<sbyte>.Zero);
}
// Validate: bytes != 0 && bytes <= 127
// Subtract 1 from all bytes to move 0 to high bits
// bitwise or with self to catch all > 127 bytes
// mask off high bits and check if 0
[MethodImpl(MethodImplOptions.AggressiveInlining)] // Needs a push
private static bool CheckBytesInAsciiRange(long check)
{
const long HighBits = unchecked((long)0x8080808080808080L);
return (((check - 0x0101010101010101L) | check) & HighBits) == 0;
}
private static bool CheckBytesInAsciiRange(int check)
{
const int HighBits = unchecked((int)0x80808080);
return (((check - 0x01010101) | check) & HighBits) == 0;
}
private static bool CheckBytesInAsciiRange(short check)
{
const short HighBits = unchecked((short)0x8080);
return (((short)(check - 0x0101) | check) & HighBits) == 0;
}
private static bool CheckBytesInAsciiRange(sbyte check)
=> check > 0;
private void Verify()
{
var verification = EncodingAsciiGetChars().Substring(0, _asciiBytes.Length);
BlankString('\0');
EncodingAsciiGetChars();
VerifyString(verification, '\0');
BlankString(' ');
EncodingAsciiGetChars();
VerifyString(verification, ' ');
BlankString('\0');
KestrelBytesToString();
VerifyString(verification, '\0');
BlankString(' ');
KestrelBytesToString();
VerifyString(verification, ' ');
BlankString('\0');
AsciiBytesToStringVectorCheck();
VerifyString(verification, '\0');
BlankString(' ');
AsciiBytesToStringVectorCheck();
VerifyString(verification, ' ');
BlankString('\0');
AsciiBytesToStringVectorWiden();
VerifyString(verification, '\0');
BlankString(' ');
AsciiBytesToStringVectorWiden();
VerifyString(verification, ' ');
BlankString('\0');
AsciiBytesToStringSpanWiden();
VerifyString(verification, '\0');
BlankString(' ');
AsciiBytesToStringSpanWiden();
VerifyString(verification, ' ');
}
private unsafe void BlankString(char ch)
{
fixed (char* pString = _asciiString)
{
for (var i = 0; i < _asciiString.Length; i++)
{
*(pString + i) = ch;
}
}
}
private unsafe void VerifyString(string verification, char ch)
{
fixed (char* pString = _asciiString)
{
var i = 0;
for (; i < verification.Length; i++)
{
if (*(pString + i) != verification[i]) throw new Exception($"Verify failed, saw {(int)*(pString + i)} expected {(int)verification[i]} at position {i}");
}
for (; i < _asciiString.Length; i++)
{
if (*(pString + i) != ch) throw new Exception($"Verify failed, saw {(int)*(pString + i)} expected {(int)ch} at position {i}"); ;
}
}
}
public enum BenchmarkTypes
{
KeepAlive,
Accept,
UserAgent,
Cookie,
}
}
}

View File

@ -0,0 +1,82 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using BenchmarkDotNet.Attributes;
using Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure;
namespace Microsoft.AspNetCore.Server.Kestrel.Performance
{
public class BytesToStringBenchmark
{
private const int Iterations = 50;
private byte[] _asciiBytes;
private byte[] _utf8Bytes;
[Params(
BenchmarkTypes.KeepAlive,
BenchmarkTypes.Accept,
BenchmarkTypes.UserAgent,
BenchmarkTypes.Cookie
)]
public BenchmarkTypes Type { get; set; }
[GlobalSetup]
public void Setup()
{
switch (Type)
{
case BenchmarkTypes.KeepAlive:
// keep-alive
_asciiBytes = new byte[] { 0x6b, 0x65, 0x65, 0x70, 0x2d, 0x61, 0x6c, 0x69, 0x76, 0x65 };
// kéép-álivé
_utf8Bytes = new byte[] { 0x6b, 0xc3, 0xa9, 0xc3, 0xa9, 0x70, 0x2d, 0xc3, 0xa1, 0x6c, 0x69, 0x76, 0xc3, 0xa9 };
break;
case BenchmarkTypes.Accept:
// text/plain,text/html;q=0.9,application/xhtml+xml;q=0.9,application/xml;q=0.8,*/*;q=0.7
_asciiBytes = new byte[] { 0x74, 0x65, 0x78, 0x74, 0x2f, 0x70, 0x6c, 0x61, 0x69, 0x6e, 0x2c, 0x74, 0x65, 0x78, 0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x39, 0x2c, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x68, 0x74, 0x6d, 0x6c, 0x2b, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x39, 0x2c, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x38, 0x2c, 0x2a, 0x2f, 0x2a, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x37 };
// téxt/pláin,téxt/html;q=0.9,ápplicátion/xhtml+xml;q=0.9,ápplicátion/xml;q=0.8,*/*;q=0.7
_utf8Bytes = new byte[] { 0x74, 0xc3, 0xa9, 0x78, 0x74, 0x2f, 0x70, 0x6c, 0xc3, 0xa1, 0x69, 0x6e, 0x2c, 0x74, 0xc3, 0xa9, 0x78, 0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x39, 0x2c, 0xc3, 0xa1, 0x70, 0x70, 0x6c, 0x69, 0x63, 0xc3, 0xa1, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x68, 0x74, 0x6d, 0x6c, 0x2b, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x39, 0x2c, 0xc3, 0xa1, 0x70, 0x70, 0x6c, 0x69, 0x63, 0xc3, 0xa1, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x38, 0x2c, 0x2a, 0x2f, 0x2a, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x37 };
break;
case BenchmarkTypes.UserAgent:
// Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36
_asciiBytes = new byte[] { 0x4d, 0x6f, 0x7a, 0x69, 0x6c, 0x6c, 0x61, 0x2f, 0x35, 0x2e, 0x30, 0x20, 0x28, 0x57, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x73, 0x20, 0x4e, 0x54, 0x20, 0x31, 0x30, 0x2e, 0x30, 0x3b, 0x20, 0x57, 0x4f, 0x57, 0x36, 0x34, 0x29, 0x20, 0x41, 0x70, 0x70, 0x6c, 0x65, 0x57, 0x65, 0x62, 0x4b, 0x69, 0x74, 0x2f, 0x35, 0x33, 0x37, 0x2e, 0x33, 0x36, 0x20, 0x28, 0x4b, 0x48, 0x54, 0x4d, 0x4c, 0x2c, 0x20, 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x47, 0x65, 0x63, 0x6b, 0x6f, 0x29, 0x20, 0x43, 0x68, 0x72, 0x6f, 0x6d, 0x65, 0x2f, 0x35, 0x34, 0x2e, 0x30, 0x2e, 0x32, 0x38, 0x34, 0x30, 0x2e, 0x39, 0x39, 0x20, 0x53, 0x61, 0x66, 0x61, 0x72, 0x69, 0x2f, 0x35, 0x33, 0x37, 0x2e, 0x33, 0x36 };
// Mozillá/5.0 (Windows NT 10.0; WOW64) áppléWébKit/537.36 (KHTML, liké Gécko) Chromé/54.0.2840.99 Sáfári/537.36
_utf8Bytes = new byte[] { 0x4d, 0x6f, 0x7a, 0x69, 0x6c, 0x6c, 0xc3, 0xa1, 0x2f, 0x35, 0x2e, 0x30, 0x20, 0x28, 0x57, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x73, 0x20, 0x4e, 0x54, 0x20, 0x31, 0x30, 0x2e, 0x30, 0x3b, 0x20, 0x57, 0x4f, 0x57, 0x36, 0x34, 0x29, 0x20, 0xc3, 0xa1, 0x70, 0x70, 0x6c, 0xc3, 0xa9, 0x57, 0xc3, 0xa9, 0x62, 0x4b, 0x69, 0x74, 0x2f, 0x35, 0x33, 0x37, 0x2e, 0x33, 0x36, 0x20, 0x28, 0x4b, 0x48, 0x54, 0x4d, 0x4c, 0x2c, 0x20, 0x6c, 0x69, 0x6b, 0xc3, 0xa9, 0x20, 0x47, 0xc3, 0xa9, 0x63, 0x6b, 0x6f, 0x29, 0x20, 0x43, 0x68, 0x72, 0x6f, 0x6d, 0xc3, 0xa9, 0x2f, 0x35, 0x34, 0x2e, 0x30, 0x2e, 0x32, 0x38, 0x34, 0x30, 0x2e, 0x39, 0x39, 0x20, 0x53, 0xc3, 0xa1, 0x66, 0xc3, 0xa1, 0x72, 0x69, 0x2f, 0x35, 0x33, 0x37, 0x2e, 0x33, 0x36 };
break;
case BenchmarkTypes.Cookie:
// prov=20629ccd-8b0f-e8ef-2935-cd26609fc0bc; __qca=P0-1591065732-1479167353442; _ga=GA1.2.1298898376.1479167354; _gat=1; sgt=id=9519gfde_3347_4762_8762_df51458c8ec2; acct=t=why-is-%e0%a5%a7%e0%a5%a8%e0%a5%a9-numeric&s=why-is-%e0%a5%a7%e0%a5%a8%e0%a5%a9-numeric
_asciiBytes = new byte[] { 0x70, 0x72, 0x6f, 0x76, 0x3d, 0x32, 0x30, 0x36, 0x32, 0x39, 0x63, 0x63, 0x64, 0x2d, 0x38, 0x62, 0x30, 0x66, 0x2d, 0x65, 0x38, 0x65, 0x66, 0x2d, 0x32, 0x39, 0x33, 0x35, 0x2d, 0x63, 0x64, 0x32, 0x36, 0x36, 0x30, 0x39, 0x66, 0x63, 0x30, 0x62, 0x63, 0x3b, 0x20, 0x5f, 0x5f, 0x71, 0x63, 0x61, 0x3d, 0x50, 0x30, 0x2d, 0x31, 0x35, 0x39, 0x31, 0x30, 0x36, 0x35, 0x37, 0x33, 0x32, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x31, 0x36, 0x37, 0x33, 0x35, 0x33, 0x34, 0x34, 0x32, 0x3b, 0x20, 0x5f, 0x67, 0x61, 0x3d, 0x47, 0x41, 0x31, 0x2e, 0x32, 0x2e, 0x31, 0x32, 0x39, 0x38, 0x38, 0x39, 0x38, 0x33, 0x37, 0x36, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x31, 0x36, 0x37, 0x33, 0x35, 0x34, 0x3b, 0x20, 0x5f, 0x67, 0x61, 0x74, 0x3d, 0x31, 0x3b, 0x20, 0x73, 0x67, 0x74, 0x3d, 0x69, 0x64, 0x3d, 0x39, 0x35, 0x31, 0x39, 0x67, 0x66, 0x64, 0x65, 0x5f, 0x33, 0x33, 0x34, 0x37, 0x5f, 0x34, 0x37, 0x36, 0x32, 0x5f, 0x38, 0x37, 0x36, 0x32, 0x5f, 0x64, 0x66, 0x35, 0x31, 0x34, 0x35, 0x38, 0x63, 0x38, 0x65, 0x63, 0x32, 0x3b, 0x20, 0x61, 0x63, 0x63, 0x74, 0x3d, 0x74, 0x3d, 0x77, 0x68, 0x79, 0x2d, 0x69, 0x73, 0x2d, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x37, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x38, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x39, 0x2d, 0x6e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63, 0x26, 0x73, 0x3d, 0x77, 0x68, 0x79, 0x2d, 0x69, 0x73, 0x2d, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x37, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x38, 0x25, 0x65, 0x30, 0x25, 0x61, 0x35, 0x25, 0x61, 0x39, 0x2d, 0x6e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63 };
// prov=20629ccd-8b0f-é8éf-2935-cd26609fc0bc; __qcá=P0-1591065732-1479167353442; _gá=Gá1.2.1298898376.1479167354; _gát=1; sgt=id=9519gfdé_3347_4762_8762_df51458c8éc2; ácct=t=why-is-%é0%á5%á7%é0%á5%á8%é0%á5%á9-numéric&s=why-is-%é0%á5%á7%é0%á5%á8%é0%á5%á9-numéric
_utf8Bytes = new byte[] { 0x70, 0x72, 0x6f, 0x76, 0x3d, 0x32, 0x30, 0x36, 0x32, 0x39, 0x63, 0x63, 0x64, 0x2d, 0x38, 0x62, 0x30, 0x66, 0x2d, 0xc3, 0xa9, 0x38, 0xc3, 0xa9, 0x66, 0x2d, 0x32, 0x39, 0x33, 0x35, 0x2d, 0x63, 0x64, 0x32, 0x36, 0x36, 0x30, 0x39, 0x66, 0x63, 0x30, 0x62, 0x63, 0x3b, 0x20, 0x5f, 0x5f, 0x71, 0x63, 0xc3, 0xa1, 0x3d, 0x50, 0x30, 0x2d, 0x31, 0x35, 0x39, 0x31, 0x30, 0x36, 0x35, 0x37, 0x33, 0x32, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x31, 0x36, 0x37, 0x33, 0x35, 0x33, 0x34, 0x34, 0x32, 0x3b, 0x20, 0x5f, 0x67, 0xc3, 0xa1, 0x3d, 0x47, 0xc3, 0xa1, 0x31, 0x2e, 0x32, 0x2e, 0x31, 0x32, 0x39, 0x38, 0x38, 0x39, 0x38, 0x33, 0x37, 0x36, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x31, 0x36, 0x37, 0x33, 0x35, 0x34, 0x3b, 0x20, 0x5f, 0x67, 0xc3, 0xa1, 0x74, 0x3d, 0x31, 0x3b, 0x20, 0x73, 0x67, 0x74, 0x3d, 0x69, 0x64, 0x3d, 0x39, 0x35, 0x31, 0x39, 0x67, 0x66, 0x64, 0xc3, 0xa9, 0x5f, 0x33, 0x33, 0x34, 0x37, 0x5f, 0x34, 0x37, 0x36, 0x32, 0x5f, 0x38, 0x37, 0x36, 0x32, 0x5f, 0x64, 0x66, 0x35, 0x31, 0x34, 0x35, 0x38, 0x63, 0x38, 0xc3, 0xa9, 0x63, 0x32, 0x3b, 0x20, 0xc3, 0xa1, 0x63, 0x63, 0x74, 0x3d, 0x74, 0x3d, 0x77, 0x68, 0x79, 0x2d, 0x69, 0x73, 0x2d, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x37, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x38, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x39, 0x2d, 0x6e, 0x75, 0x6d, 0xc3, 0xa9, 0x72, 0x69, 0x63, 0x26, 0x73, 0x3d, 0x77, 0x68, 0x79, 0x2d, 0x69, 0x73, 0x2d, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x37, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x38, 0x25, 0xc3, 0xa9, 0x30, 0x25, 0xc3, 0xa1, 0x35, 0x25, 0xc3, 0xa1, 0x39, 0x2d, 0x6e, 0x75, 0x6d, 0xc3, 0xa9, 0x72, 0x69, 0x63 };
break;
}
}
[Benchmark(Baseline = true, OperationsPerInvoke = Iterations)]
public void AsciiBytesToString()
{
for (uint i = 0; i < Iterations; i++)
{
HttpUtilities.GetAsciiStringNonNullCharacters(_asciiBytes);
}
}
[Benchmark(OperationsPerInvoke = Iterations)]
public void Utf8BytesToString()
{
for (uint i = 0; i < Iterations; i++)
{
HttpUtilities.GetAsciiOrUTF8StringNonNullCharacters(_utf8Bytes);
}
}
public enum BenchmarkTypes
{
KeepAlive,
Accept,
UserAgent,
Cookie,
}
}
}

View File

@ -439,7 +439,7 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Http
{
BadHttpRequestException.Throw(RequestRejectionReason.TooManyHeaders);
}
var valueString = value.GetAsciiStringNonNullCharacters();
var valueString = value.GetAsciiOrUTF8StringNonNullCharacters();
HttpRequestHeaders.Append(name, valueString);
}

View File

@ -29,6 +29,8 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure
private const ulong _http10VersionLong = 3471766442030158920; // GetAsciiStringAsLong("HTTP/1.0"); const results in better codegen
private const ulong _http11VersionLong = 3543824036068086856; // GetAsciiStringAsLong("HTTP/1.1"); const results in better codegen
private static readonly UTF8EncodingSealed HeaderValueEncoding = new UTF8EncodingSealed();
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void SetKnownMethod(ulong mask, ulong knownMethodUlong, HttpMethod knownMethod, int length)
{
@ -105,6 +107,41 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure
return asciiString;
}
public static unsafe string GetAsciiOrUTF8StringNonNullCharacters(this Span<byte> span)
{
if (span.IsEmpty)
{
return string.Empty;
}
var resultString = new string('\0', span.Length);
fixed (char* output = resultString)
fixed (byte* buffer = &MemoryMarshal.GetReference(span))
{
// This version if AsciiUtilities returns null if there are any null (0 byte) characters
// in the string
if (!StringUtilities.TryGetAsciiString(buffer, output, span.Length))
{
// null characters are considered invalid
if (span.IndexOf((byte)0) != -1)
{
throw new InvalidOperationException();
}
try
{
resultString = HeaderValueEncoding.GetString(buffer, span.Length);
}
catch (DecoderFallbackException)
{
throw new InvalidOperationException();
}
}
}
return resultString;
}
public static string GetAsciiStringEscaped(this Span<byte> span, int maxChars)
{
var sb = new StringBuilder();
@ -505,5 +542,13 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure
// Check if less than 6 representing chars 'a' - 'f'
|| (uint)((ch | 32) - 'a') < 6u;
}
// Allow for de-virtualization (see https://github.com/dotnet/coreclr/pull/9230)
private sealed class UTF8EncodingSealed : UTF8Encoding
{
public UTF8EncodingSealed() : base(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true) { }
public override byte[] GetPreamble() => Array.Empty<byte>();
}
}
}

View File

@ -81,6 +81,41 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Tests
_pipelineFactory.Dispose();
}
[Fact]
public async Task TakeMessageHeadersSucceedsWhenHeaderValueContainsUTF8()
{
var headerName = "Header";
var headerValueBytes = new byte[] { 0x46, 0x72, 0x61, 0x6e, 0xc3, 0xa7, 0x6f, 0x69, 0x73 };
var headerValue = Encoding.UTF8.GetString(headerValueBytes);
_http1Connection.Reset();
await _application.Output.WriteAsync(Encoding.UTF8.GetBytes($"{headerName}: "));
await _application.Output.WriteAsync(headerValueBytes);
await _application.Output.WriteAsync(Encoding.UTF8.GetBytes("\r\n\r\n"));
var readableBuffer = (await _transport.Input.ReadAsync()).Buffer;
_http1Connection.TakeMessageHeaders(readableBuffer, out _consumed, out _examined);
_transport.Input.AdvanceTo(_consumed, _examined);
Assert.Equal(headerValue, _http1Connection.RequestHeaders[headerName]);
}
[Fact]
public async Task TakeMessageHeadersThrowsWhenHeaderValueContainsExtendedASCII()
{
var extendedAsciiEncoding = Encoding.GetEncoding("ISO-8859-1");
var headerName = "Header";
var headerValueBytes = new byte[] { 0x46, 0x72, 0x61, 0x6e, 0xe7, 0x6f, 0x69, 0x73 };
_http1Connection.Reset();
await _application.Output.WriteAsync(extendedAsciiEncoding.GetBytes($"{headerName}: "));
await _application.Output.WriteAsync(headerValueBytes);
await _application.Output.WriteAsync(extendedAsciiEncoding.GetBytes("\r\n\r\n"));
var readableBuffer = (await _transport.Input.ReadAsync()).Buffer;
var exception = Assert.Throws<InvalidOperationException>(() => _http1Connection.TakeMessageHeaders(readableBuffer, out _consumed, out _examined));
}
[Fact]
public async Task TakeMessageHeadersThrowsWhenHeadersExceedTotalSizeLimit()
{

View File

@ -0,0 +1,43 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Linq;
using System.Numerics;
using Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure;
using Xunit;
namespace Microsoft.AspNetCore.Server.Kestrel.Core.Tests
{
public class UTF8DecodingTests
{
[Theory]
[InlineData(new byte[] { 0x01 })] // 1 byte: Control character, lowest UTF-8 character we will allow to be decoded since 0x00 is rejected,
[InlineData(new byte[] { 0xc2, 0xa0})] // 2 bytes: Non-breaking space, lowest valid UTF-8 that is not a valid ASCII character
[InlineData(new byte[] { 0xef, 0xbf, 0xbd })] // 3 bytes: Replacement character, highest UTF-8 character currently encoded in the UTF-8 code page
private void FullUTF8RangeSupported(byte[] encodedBytes)
{
var s = encodedBytes.AsSpan().GetAsciiOrUTF8StringNonNullCharacters();
Assert.Equal(1, s.Length);
}
[Theory]
[InlineData(new byte[] { 0x00 })] // We reject the null character
[InlineData(new byte[] { 0x80 })] // First valid Extended ASCII that is not a valid UTF-8 Encoding
[InlineData(new byte[] { 0x20, 0xac })] // First valid Extended ASCII that is not a valid UTF-8 Encoding
private void ExceptionThrownForZeroOrNonAscii(byte[] bytes)
{
for (var length = bytes.Length; length < Vector<sbyte>.Count * 4 + bytes.Length; length++)
{
for (var position = 0; position <= length - bytes.Length; position++)
{
var byteRange = Enumerable.Range(1, length).Select(x => (byte)x).ToArray();
Array.Copy(bytes, 0, byteRange, position, bytes.Length);
Assert.Throws<InvalidOperationException>(() => byteRange.AsSpan().GetAsciiOrUTF8StringNonNullCharacters());
}
}
}
}
}

View File

@ -304,7 +304,7 @@ namespace Microsoft.AspNetCore.Server.Kestrel.Core.Tests
void IHttpHeadersHandler.OnHeader(Span<byte> name, Span<byte> value)
{
_decodedHeaders[name.GetAsciiStringNonNullCharacters()] = value.GetAsciiStringNonNullCharacters();
_decodedHeaders[name.GetAsciiStringNonNullCharacters()] = value.GetAsciiOrUTF8StringNonNullCharacters();
}
protected async Task InitializeConnectionAsync(RequestDelegate application, int expectedSettingsLegnth = 6)