diff --git a/src/Microsoft.Net.Http.Server/RequestProcessing/UrlInByte.cs b/src/Microsoft.Net.Http.Server/RequestProcessing/RawUrlHelper.cs similarity index 66% rename from src/Microsoft.Net.Http.Server/RequestProcessing/UrlInByte.cs rename to src/Microsoft.Net.Http.Server/RequestProcessing/RawUrlHelper.cs index ed6ac62126..0d7d194851 100644 --- a/src/Microsoft.Net.Http.Server/RequestProcessing/UrlInByte.cs +++ b/src/Microsoft.Net.Http.Server/RequestProcessing/RawUrlHelper.cs @@ -1,27 +1,36 @@ -using System; -using System.Text; +// Copyright (c) Microsoft Open Technologies, Inc. +// All Rights Reserved +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING +// WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF +// TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR +// NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing +// permissions and limitations under the License. + +// ----------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// ----------------------------------------------------------------------- + +using System; namespace Microsoft.Net.Http.Server { - internal class UrlInByte + internal static class RawUrlHelper { - private static string HTTP_SCHEME = "http://"; - private static string HTTPS_SCHEME = "https://"; - - private readonly byte[] _raw; - - public UrlInByte(byte[] raw) - { - _raw = raw; - Path = LocalPath(_raw); - } - - public ArraySegment Path { get; } - /// /// Find the segment of the URI byte array which represents the path. /// - private static ArraySegment LocalPath(byte[] raw) + public static ArraySegment GetPath(byte[] raw) { // performance var pathStartIndex = 0; @@ -84,27 +93,52 @@ namespace Microsoft.Net.Http.Server /// Length of the matched bytes, 0 if it is not matched. private static int FindHttpOrHttps(byte[] raw) { - if (raw.Length < 7) + if (raw[0] != 'h' && raw[0] != 'H') { return 0; } - if (string.Equals(HTTP_SCHEME, Encoding.UTF8.GetString(raw, 0, 7), StringComparison.OrdinalIgnoreCase)) - { - return 7; - } - - if (raw.Length < 8) + if (raw[1] != 't' && raw[1] != 'T') { return 0; } - if (string.Equals(HTTPS_SCHEME, Encoding.UTF8.GetString(raw, 0, 8), StringComparison.OrdinalIgnoreCase)) + if (raw[2] != 't' && raw[2] != 'T') { - return 8; + return 0; } - return 0; + if (raw[3] != 'p' && raw[3] != 'P') + { + return 0; + } + + if (raw[4] == ':') + { + if (raw[5] != '/' || raw[6] != '/') + { + return 0; + } + else + { + return 7; + } + } + else if (raw[4] == 's' || raw[4] == 'S') + { + if (raw[5] != ':' || raw[6] != '/' || raw[7] != '/') + { + return 0; + } + else + { + return 8; + } + } + else + { + return 0; + } } private static int Find(byte[] raw, int begin, char target) diff --git a/src/Microsoft.Net.Http.Server/RequestProcessing/Request.cs b/src/Microsoft.Net.Http.Server/RequestProcessing/Request.cs index 3063d777fc..c244a44753 100644 --- a/src/Microsoft.Net.Http.Server/RequestProcessing/Request.cs +++ b/src/Microsoft.Net.Http.Server/RequestProcessing/Request.cs @@ -72,7 +72,7 @@ namespace Microsoft.Net.Http.Server var prefix = requestContext.Server.Settings.UrlPrefixes.GetPrefix((int)nativeRequestContext.UrlContext); var rawUrlInBytes = _nativeRequestContext.GetRawUrlInBytes(); - var originalPath = RequestUriBuilder.GetRequestPath(rawUrlInBytes, RequestContext.Logger); + var originalPath = RequestUriBuilder.DecodeAndUnescapePath(rawUrlInBytes); // 'OPTIONS * HTTP/1.1' if (KnownMethod == HttpApi.HTTP_VERB.HttpVerbOPTIONS && string.Equals(RawUrl, "*", StringComparison.Ordinal)) @@ -179,7 +179,7 @@ namespace Microsoft.Net.Http.Server public string Path { get; } public bool IsHttps => SslStatus != SslStatus.Insecure; - + public string RawUrl { get; } public Version ProtocolVersion { get; } diff --git a/src/Microsoft.Net.Http.Server/RequestProcessing/RequestUriBuilder.cs b/src/Microsoft.Net.Http.Server/RequestProcessing/RequestUriBuilder.cs index 72f76068d1..ce148f0cae 100644 --- a/src/Microsoft.Net.Http.Server/RequestProcessing/RequestUriBuilder.cs +++ b/src/Microsoft.Net.Http.Server/RequestProcessing/RequestUriBuilder.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Open Technologies, Inc. +// Copyright (c) Microsoft Open Technologies, Inc. // All Rights Reserved // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,9 +21,8 @@ // // ----------------------------------------------------------------------- -using System.Diagnostics; +using System; using System.Text; -using Microsoft.Extensions.Logging; namespace Microsoft.Net.Http.Server { @@ -31,32 +30,333 @@ namespace Microsoft.Net.Http.Server // we also can't just use the raw Uri, since http.sys supports not only UTF-8, but also ANSI/DBCS and // Unicode code points. System.Uri only supports UTF-8. // The purpose of this class is to decode all UTF-8 percent encoded characters, with the - // exception of %2F ('/'), which is left encoded. - internal sealed class RequestUriBuilder + // exception of %2F ('/'), which is left encoded + internal static class RequestUriBuilder { - private static readonly Encoding Utf8Encoding; + private static readonly Encoding UTF8 = new UTF8Encoding( + encoderShouldEmitUTF8Identifier: false, + throwOnInvalidBytes: true); - static RequestUriBuilder() + public static string DecodeAndUnescapePath(byte[] urlInBytes) { - Utf8Encoding = new UTF8Encoding(false, true); - } - - // Process only the path. - public static string GetRequestPath(byte[] rawUriInBytes, ILogger logger) - { - //Debug.Assert(rawUriInBytes == null || rawUriInBytes.Length == 0, "Empty raw URL."); - //Debug.Assert(logger != null, "Null logger."); - - var rawUriInByte = new UrlInByte(rawUriInBytes); - var pathInByte = rawUriInByte.Path; - - if (pathInByte.Count == 1 && pathInByte.Array[pathInByte.Offset] == '*') + if (urlInBytes == null) { - return "/*"; + throw new ArgumentNullException(nameof(urlInBytes)); } - var unescapedRaw = UrlPathDecoder.Unescape(pathInByte); - return Utf8Encoding.GetString(unescapedRaw.Array, unescapedRaw.Offset, unescapedRaw.Count); + if (urlInBytes.Length == 0) + { + throw new ArgumentException("Length of the URL cannot be zero.", nameof(urlInBytes)); + } + + var rawPath = RawUrlHelper.GetPath(urlInBytes); + + var unescapedPath = Unescape(rawPath); + + return UTF8.GetString(unescapedPath.Array, unescapedPath.Offset, unescapedPath.Count); + } + + /// + /// Unescape a given path string which may contain escaped char. + /// + /// The raw path string to be unescaped + /// The unescaped path string + private static ArraySegment Unescape(ArraySegment rawPath) + { + // the slot to read the input + var reader = rawPath.Offset; + + // the slot to write the unescaped byte + var writer = rawPath.Offset; + + // the end of the path + var end = rawPath.Offset + rawPath.Count; + + // the byte array + var buffer = rawPath.Array; + + while (true) + { + if (reader == end) + { + break; + } + + if (rawPath.Array[reader] == '%') + { + var decodeReader = reader; + + // If decoding process succeeds, the writer iterator will be moved + // to the next write-ready location. On the other hand if the scanned + // percent-encodings cannot be interpreted as sequence of UTF-8 octets, + // these bytes should be copied to output as is. + // The decodeReader iterator is always moved to the first byte not yet + // be scanned after the process. A failed decoding means the chars + // between the reader and decodeReader can be copied to output untouched. + if (!DecodeCore(ref decodeReader, ref writer, end, buffer)) + { + Copy(reader, decodeReader, ref writer, buffer); + } + + reader = decodeReader; + } + else + { + buffer[writer++] = buffer[reader++]; + } + } + + return new ArraySegment(buffer, rawPath.Offset, writer - rawPath.Offset); + } + + /// + /// Unescape the percent-encodings + /// + /// The iterator point to the first % char + /// The place to write to + /// The byte array + private static bool DecodeCore(ref int reader, ref int writer, int end, byte[] buffer) + { + // preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets, + // bytes from this till the last scanned one will be copied to the memory pointed by writer. + var byte1 = UnescapePercentEncoding(ref reader, end, buffer); + + if (byte1 == 0) + { + throw new InvalidOperationException("The path contains null characters."); + } + + if (byte1 == -1) + { + return false; + } + + if (byte1 <= 0x7F) + { + // first byte < U+007f, it is a single byte ASCII + buffer[writer++] = (byte)byte1; + return true; + } + + int byte2 = 0, byte3 = 0, byte4 = 0; + + // anticipate more bytes + var currentDecodeBits = 0; + var byteCount = 1; + var expectValueMin = 0; + if ((byte1 & 0xE0) == 0xC0) + { + // 110x xxxx, expect one more byte + currentDecodeBits = byte1 & 0x1F; + byteCount = 2; + expectValueMin = 0x80; + } + else if ((byte1 & 0xF0) == 0xE0) + { + // 1110 xxxx, expect two more bytes + currentDecodeBits = byte1 & 0x0F; + byteCount = 3; + expectValueMin = 0x800; + } + else if ((byte1 & 0xF8) == 0xF0) + { + // 1111 0xxx, expect three more bytes + currentDecodeBits = byte1 & 0x07; + byteCount = 4; + expectValueMin = 0x10000; + } + else + { + // invalid first byte + return false; + } + + var remainingBytes = byteCount - 1; + while (remainingBytes > 0) + { + // read following three chars + if (reader == buffer.Length) + { + return false; + } + + var nextItr = reader; + var nextByte = UnescapePercentEncoding(ref nextItr, end, buffer); + if (nextByte == -1) + { + return false; + } + + if ((nextByte & 0xC0) != 0x80) + { + // the follow up byte is not in form of 10xx xxxx + return false; + } + + currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F); + remainingBytes--; + + if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F) + { + // this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that + // are not allowed in UTF-8; + return false; + } + + if (remainingBytes == 2 && currentDecodeBits >= 0x110) + { + // this is going to be out of the upper Unicode bound 0x10FFFF. + return false; + } + + reader = nextItr; + if (byteCount - remainingBytes == 2) + { + byte2 = nextByte; + } + else if (byteCount - remainingBytes == 3) + { + byte3 = nextByte; + } + else if (byteCount - remainingBytes == 4) + { + byte4 = nextByte; + } + } + + if (currentDecodeBits < expectValueMin) + { + // overlong encoding (e.g. using 2 bytes to encode something that only needed 1). + return false; + } + + // all bytes are verified, write to the output + if (byteCount > 0) + { + buffer[writer++] = (byte)byte1; + } + if (byteCount > 1) + { + buffer[writer++] = (byte)byte2; + } + if (byteCount > 2) + { + buffer[writer++] = (byte)byte3; + } + if (byteCount > 3) + { + buffer[writer++] = (byte)byte4; + } + + return true; + } + + private static void Copy(int begin, int end, ref int writer, byte[] buffer) + { + while (begin != end) + { + buffer[writer++] = buffer[begin++]; + } + } + + /// + /// Read the percent-encoding and try unescape it. + /// + /// The operation first peek at the character the + /// iterator points at. If it is % the is then + /// moved on to scan the following to characters. If the two following + /// characters are hexadecimal literals they will be unescaped and the + /// value will be returned. + /// + /// If the first character is not % the iterator + /// will be removed beyond the location of % and -1 will be returned. + /// + /// If the following two characters can't be successfully unescaped the + /// iterator will be move behind the % and -1 + /// will be returned. + /// + /// The value to read + /// The byte array + /// The unescaped byte if success. Otherwise return -1. + private static int UnescapePercentEncoding(ref int scan, int end, byte[] buffer) + { + if (buffer[scan++] != '%') + { + return -1; + } + + var probe = scan; + + int value1 = ReadHex(ref probe, end, buffer); + if (value1 == -1) + { + return -1; + } + + int value2 = ReadHex(ref probe, end, buffer); + if (value2 == -1) + { + return -1; + } + + if (SkipUnescape(value1, value2)) + { + return -1; + } + + scan = probe; + return (value1 << 4) + value2; + } + + /// + /// Read the next char and convert it into hexadecimal value. + /// + /// The iterator will be moved to the next + /// byte no matter no matter whether the operation successes. + /// + /// The value to read + /// The byte array + /// The hexadecimal value if successes, otherwise -1. + private static int ReadHex(ref int scan, int end, byte[] buffer) + { + if (scan == end) + { + return -1; + } + + var value = buffer[scan++]; + var isHead = (((value >= '0') && (value <= '9')) || + ((value >= 'A') && (value <= 'F')) || + ((value >= 'a') && (value <= 'f'))); + + if (!isHead) + { + return -1; + } + + if (value <= '9') + { + return value - '0'; + } + else if (value <= 'F') + { + return (value - 'A') + 10; + } + else // a - f + { + return (value - 'a') + 10; + } + } + + private static bool SkipUnescape(int value1, int value2) + { + // skip %2F + if (value1 == 2 && value2 == 15) + { + return true; + } + + return false; } } } diff --git a/src/Microsoft.Net.Http.Server/RequestProcessing/UrlPathDecoder.cs b/src/Microsoft.Net.Http.Server/RequestProcessing/UrlPathDecoder.cs deleted file mode 100644 index 7086029810..0000000000 --- a/src/Microsoft.Net.Http.Server/RequestProcessing/UrlPathDecoder.cs +++ /dev/null @@ -1,313 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; - -namespace Microsoft.Net.Http.Server -{ - public class UrlPathDecoder - { - /// - /// Unescape a given path string which may contain escaped char. - /// - /// The raw path string to be unescaped - /// The unescaped path string - public static ArraySegment Unescape(ArraySegment rawPath) - { - // the slot to read the input - var reader = rawPath.Offset; - - // the slot to write the unescaped byte - var writer = rawPath.Offset; - - // the end of the path - var end = rawPath.Offset + rawPath.Count; - - // the byte array - var buffer = rawPath.Array; - - while (true) - { - if (reader == end) - { - break; - } - - if (rawPath.Array[reader] == '%') - { - var decodeReader = reader; - - // If decoding process succeeds, the writer iterator will be moved - // to the next write-ready location. On the other hand if the scanned - // percent-encodings cannot be interpreted as sequence of UTF-8 octets, - // these bytes should be copied to output as is. - // The decodeReader iterator is always moved to the first byte not yet - // be scanned after the process. A failed decoding means the chars - // between the reader and decodeReader can be copied to output untouched. - if (!DecodeCore(ref decodeReader, ref writer, end, buffer)) - { - Copy(reader, decodeReader, ref writer, buffer); - } - - reader = decodeReader; - } - else - { - buffer[writer++] = buffer[reader++]; - } - } - - return new ArraySegment(buffer, rawPath.Offset, writer - rawPath.Offset); - } - - /// - /// Unescape the percent-encodings - /// - /// The iterator point to the first % char - /// The place to write to - /// The byte array - private static bool DecodeCore(ref int reader, ref int writer, int end, byte[] buffer) - { - // preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets, - // bytes from this till the last scanned one will be copied to the memory pointed by writer. - var byte1 = UnescapePercentEncoding(ref reader, end, buffer); - - if (byte1 == 0) - { - throw new InvalidOperationException("The path contains null characters."); - } - - if (byte1 == -1) - { - return false; - } - - if (byte1 <= 0x7F) - { - // first byte < U+007f, it is a single byte ASCII - buffer[writer++] = (byte)byte1; - return true; - } - - int byte2 = 0, byte3 = 0, byte4 = 0; - - // anticipate more bytes - var currentDecodeBits = 0; - var byteCount = 1; - var expectValueMin = 0; - if ((byte1 & 0xE0) == 0xC0) - { - // 110x xxxx, expect one more byte - currentDecodeBits = byte1 & 0x1F; - byteCount = 2; - expectValueMin = 0x80; - } - else if ((byte1 & 0xF0) == 0xE0) - { - // 1110 xxxx, expect two more bytes - currentDecodeBits = byte1 & 0x0F; - byteCount = 3; - expectValueMin = 0x800; - } - else if ((byte1 & 0xF8) == 0xF0) - { - // 1111 0xxx, expect three more bytes - currentDecodeBits = byte1 & 0x07; - byteCount = 4; - expectValueMin = 0x10000; - } - else - { - // invalid first byte - return false; - } - - var remainingBytes = byteCount - 1; - while (remainingBytes > 0) - { - // read following three chars - if (reader == buffer.Length) - { - return false; - } - - var nextItr = reader; - var nextByte = UnescapePercentEncoding(ref nextItr, end, buffer); - if (nextByte == -1) - { - return false; - } - - if ((nextByte & 0xC0) != 0x80) - { - // the follow up byte is not in form of 10xx xxxx - return false; - } - - currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F); - remainingBytes--; - - if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F) - { - // this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that - // are not allowed in UTF-8; - return false; - } - - if (remainingBytes == 2 && currentDecodeBits >= 0x110) - { - // this is going to be out of the upper Unicode bound 0x10FFFF. - return false; - } - - reader = nextItr; - if (byteCount - remainingBytes == 2) - { - byte2 = nextByte; - } - else if (byteCount - remainingBytes == 3) - { - byte3 = nextByte; - } - else if (byteCount - remainingBytes == 4) - { - byte4 = nextByte; - } - } - - if (currentDecodeBits < expectValueMin) - { - // overlong encoding (e.g. using 2 bytes to encode something that only needed 1). - return false; - } - - // all bytes are verified, write to the output - if (byteCount > 0) - { - buffer[writer++] = (byte)byte1; - } - if (byteCount > 1) - { - buffer[writer++] = (byte)byte2; - } - if (byteCount > 2) - { - buffer[writer++] = (byte)byte3; - } - if (byteCount > 3) - { - buffer[writer++] = (byte)byte4; - } - - return true; - } - - private static void Copy(int begin, int end, ref int writer, byte[] buffer) - { - while (begin != end) - { - buffer[writer++] = buffer[begin++]; - } - } - - /// - /// Read the percent-encoding and try unescape it. - /// - /// The operation first peek at the character the - /// iterator points at. If it is % the is then - /// moved on to scan the following to characters. If the two following - /// characters are hexadecimal literals they will be unescaped and the - /// value will be returned. - /// - /// If the first character is not % the iterator - /// will be removed beyond the location of % and -1 will be returned. - /// - /// If the following two characters can't be successfully unescaped the - /// iterator will be move behind the % and -1 - /// will be returned. - /// - /// The value to read - /// The byte array - /// The unescaped byte if success. Otherwise return -1. - private static int UnescapePercentEncoding(ref int scan, int end, byte[] buffer) - { - if (buffer[scan++] != '%') - { - return -1; - } - - var probe = scan; - - int value1 = ReadHex(ref probe, end, buffer); - if (value1 == -1) - { - return -1; - } - - int value2 = ReadHex(ref probe, end, buffer); - if (value2 == -1) - { - return -1; - } - - if (SkipUnescape(value1, value2)) - { - return -1; - } - - scan = probe; - return (value1 << 4) + value2; - } - - /// - /// Read the next char and convert it into hexadecimal value. - /// - /// The iterator will be moved to the next - /// byte no matter no matter whether the operation successes. - /// - /// The value to read - /// The byte array - /// The hexadecimal value if successes, otherwise -1. - private static int ReadHex(ref int scan, int end, byte[] buffer) - { - if (scan == end) - { - return -1; - } - - var value = buffer[scan++]; - var isHead = (((value >= '0') && (value <= '9')) || - ((value >= 'A') && (value <= 'F')) || - ((value >= 'a') && (value <= 'f'))); - - if (!isHead) - { - return -1; - } - - if (value <= '9') - { - return value - '0'; - } - else if (value <= 'F') - { - return (value - 'A') + 10; - } - else // a - f - { - return (value - 'a') + 10; - } - } - - private static bool SkipUnescape(int value1, int value2) - { - // skip %2F - if (value1 == 2 && value2 == 15) - { - return true; - } - - return false; - } - } -}