diff --git a/src/Microsoft.Net.Http.Server/RequestProcessing/UrlInByte.cs b/src/Microsoft.Net.Http.Server/RequestProcessing/RawUrlHelper.cs
similarity index 66%
rename from src/Microsoft.Net.Http.Server/RequestProcessing/UrlInByte.cs
rename to src/Microsoft.Net.Http.Server/RequestProcessing/RawUrlHelper.cs
index ed6ac62126..0d7d194851 100644
--- a/src/Microsoft.Net.Http.Server/RequestProcessing/UrlInByte.cs
+++ b/src/Microsoft.Net.Http.Server/RequestProcessing/RawUrlHelper.cs
@@ -1,27 +1,36 @@
-using System;
-using System.Text;
+// Copyright (c) Microsoft Open Technologies, Inc.
+// All Rights Reserved
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+// WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
+// TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR
+// NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing
+// permissions and limitations under the License.
+
+// -----------------------------------------------------------------------
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// -----------------------------------------------------------------------
+
+using System;
namespace Microsoft.Net.Http.Server
{
- internal class UrlInByte
+ internal static class RawUrlHelper
{
- private static string HTTP_SCHEME = "http://";
- private static string HTTPS_SCHEME = "https://";
-
- private readonly byte[] _raw;
-
- public UrlInByte(byte[] raw)
- {
- _raw = raw;
- Path = LocalPath(_raw);
- }
-
- public ArraySegment Path { get; }
-
///
/// Find the segment of the URI byte array which represents the path.
///
- private static ArraySegment LocalPath(byte[] raw)
+ public static ArraySegment GetPath(byte[] raw)
{
// performance
var pathStartIndex = 0;
@@ -84,27 +93,52 @@ namespace Microsoft.Net.Http.Server
/// Length of the matched bytes, 0 if it is not matched.
private static int FindHttpOrHttps(byte[] raw)
{
- if (raw.Length < 7)
+ if (raw[0] != 'h' && raw[0] != 'H')
{
return 0;
}
- if (string.Equals(HTTP_SCHEME, Encoding.UTF8.GetString(raw, 0, 7), StringComparison.OrdinalIgnoreCase))
- {
- return 7;
- }
-
- if (raw.Length < 8)
+ if (raw[1] != 't' && raw[1] != 'T')
{
return 0;
}
- if (string.Equals(HTTPS_SCHEME, Encoding.UTF8.GetString(raw, 0, 8), StringComparison.OrdinalIgnoreCase))
+ if (raw[2] != 't' && raw[2] != 'T')
{
- return 8;
+ return 0;
}
- return 0;
+ if (raw[3] != 'p' && raw[3] != 'P')
+ {
+ return 0;
+ }
+
+ if (raw[4] == ':')
+ {
+ if (raw[5] != '/' || raw[6] != '/')
+ {
+ return 0;
+ }
+ else
+ {
+ return 7;
+ }
+ }
+ else if (raw[4] == 's' || raw[4] == 'S')
+ {
+ if (raw[5] != ':' || raw[6] != '/' || raw[7] != '/')
+ {
+ return 0;
+ }
+ else
+ {
+ return 8;
+ }
+ }
+ else
+ {
+ return 0;
+ }
}
private static int Find(byte[] raw, int begin, char target)
diff --git a/src/Microsoft.Net.Http.Server/RequestProcessing/Request.cs b/src/Microsoft.Net.Http.Server/RequestProcessing/Request.cs
index 3063d777fc..c244a44753 100644
--- a/src/Microsoft.Net.Http.Server/RequestProcessing/Request.cs
+++ b/src/Microsoft.Net.Http.Server/RequestProcessing/Request.cs
@@ -72,7 +72,7 @@ namespace Microsoft.Net.Http.Server
var prefix = requestContext.Server.Settings.UrlPrefixes.GetPrefix((int)nativeRequestContext.UrlContext);
var rawUrlInBytes = _nativeRequestContext.GetRawUrlInBytes();
- var originalPath = RequestUriBuilder.GetRequestPath(rawUrlInBytes, RequestContext.Logger);
+ var originalPath = RequestUriBuilder.DecodeAndUnescapePath(rawUrlInBytes);
// 'OPTIONS * HTTP/1.1'
if (KnownMethod == HttpApi.HTTP_VERB.HttpVerbOPTIONS && string.Equals(RawUrl, "*", StringComparison.Ordinal))
@@ -179,7 +179,7 @@ namespace Microsoft.Net.Http.Server
public string Path { get; }
public bool IsHttps => SslStatus != SslStatus.Insecure;
-
+
public string RawUrl { get; }
public Version ProtocolVersion { get; }
diff --git a/src/Microsoft.Net.Http.Server/RequestProcessing/RequestUriBuilder.cs b/src/Microsoft.Net.Http.Server/RequestProcessing/RequestUriBuilder.cs
index 72f76068d1..ce148f0cae 100644
--- a/src/Microsoft.Net.Http.Server/RequestProcessing/RequestUriBuilder.cs
+++ b/src/Microsoft.Net.Http.Server/RequestProcessing/RequestUriBuilder.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Microsoft Open Technologies, Inc.
+// Copyright (c) Microsoft Open Technologies, Inc.
// All Rights Reserved
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -21,9 +21,8 @@
//
// -----------------------------------------------------------------------
-using System.Diagnostics;
+using System;
using System.Text;
-using Microsoft.Extensions.Logging;
namespace Microsoft.Net.Http.Server
{
@@ -31,32 +30,333 @@ namespace Microsoft.Net.Http.Server
// we also can't just use the raw Uri, since http.sys supports not only UTF-8, but also ANSI/DBCS and
// Unicode code points. System.Uri only supports UTF-8.
// The purpose of this class is to decode all UTF-8 percent encoded characters, with the
- // exception of %2F ('/'), which is left encoded.
- internal sealed class RequestUriBuilder
+ // exception of %2F ('/'), which is left encoded
+ internal static class RequestUriBuilder
{
- private static readonly Encoding Utf8Encoding;
+ private static readonly Encoding UTF8 = new UTF8Encoding(
+ encoderShouldEmitUTF8Identifier: false,
+ throwOnInvalidBytes: true);
- static RequestUriBuilder()
+ public static string DecodeAndUnescapePath(byte[] urlInBytes)
{
- Utf8Encoding = new UTF8Encoding(false, true);
- }
-
- // Process only the path.
- public static string GetRequestPath(byte[] rawUriInBytes, ILogger logger)
- {
- //Debug.Assert(rawUriInBytes == null || rawUriInBytes.Length == 0, "Empty raw URL.");
- //Debug.Assert(logger != null, "Null logger.");
-
- var rawUriInByte = new UrlInByte(rawUriInBytes);
- var pathInByte = rawUriInByte.Path;
-
- if (pathInByte.Count == 1 && pathInByte.Array[pathInByte.Offset] == '*')
+ if (urlInBytes == null)
{
- return "/*";
+ throw new ArgumentNullException(nameof(urlInBytes));
}
- var unescapedRaw = UrlPathDecoder.Unescape(pathInByte);
- return Utf8Encoding.GetString(unescapedRaw.Array, unescapedRaw.Offset, unescapedRaw.Count);
+ if (urlInBytes.Length == 0)
+ {
+ throw new ArgumentException("Length of the URL cannot be zero.", nameof(urlInBytes));
+ }
+
+ var rawPath = RawUrlHelper.GetPath(urlInBytes);
+
+ var unescapedPath = Unescape(rawPath);
+
+ return UTF8.GetString(unescapedPath.Array, unescapedPath.Offset, unescapedPath.Count);
+ }
+
+ ///
+ /// Unescape a given path string which may contain escaped char.
+ ///
+ /// The raw path string to be unescaped
+ /// The unescaped path string
+ private static ArraySegment Unescape(ArraySegment rawPath)
+ {
+ // the slot to read the input
+ var reader = rawPath.Offset;
+
+ // the slot to write the unescaped byte
+ var writer = rawPath.Offset;
+
+ // the end of the path
+ var end = rawPath.Offset + rawPath.Count;
+
+ // the byte array
+ var buffer = rawPath.Array;
+
+ while (true)
+ {
+ if (reader == end)
+ {
+ break;
+ }
+
+ if (rawPath.Array[reader] == '%')
+ {
+ var decodeReader = reader;
+
+ // If decoding process succeeds, the writer iterator will be moved
+ // to the next write-ready location. On the other hand if the scanned
+ // percent-encodings cannot be interpreted as sequence of UTF-8 octets,
+ // these bytes should be copied to output as is.
+ // The decodeReader iterator is always moved to the first byte not yet
+ // be scanned after the process. A failed decoding means the chars
+ // between the reader and decodeReader can be copied to output untouched.
+ if (!DecodeCore(ref decodeReader, ref writer, end, buffer))
+ {
+ Copy(reader, decodeReader, ref writer, buffer);
+ }
+
+ reader = decodeReader;
+ }
+ else
+ {
+ buffer[writer++] = buffer[reader++];
+ }
+ }
+
+ return new ArraySegment(buffer, rawPath.Offset, writer - rawPath.Offset);
+ }
+
+ ///
+ /// Unescape the percent-encodings
+ ///
+ /// The iterator point to the first % char
+ /// The place to write to
+ /// The byte array
+ private static bool DecodeCore(ref int reader, ref int writer, int end, byte[] buffer)
+ {
+ // preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets,
+ // bytes from this till the last scanned one will be copied to the memory pointed by writer.
+ var byte1 = UnescapePercentEncoding(ref reader, end, buffer);
+
+ if (byte1 == 0)
+ {
+ throw new InvalidOperationException("The path contains null characters.");
+ }
+
+ if (byte1 == -1)
+ {
+ return false;
+ }
+
+ if (byte1 <= 0x7F)
+ {
+ // first byte < U+007f, it is a single byte ASCII
+ buffer[writer++] = (byte)byte1;
+ return true;
+ }
+
+ int byte2 = 0, byte3 = 0, byte4 = 0;
+
+ // anticipate more bytes
+ var currentDecodeBits = 0;
+ var byteCount = 1;
+ var expectValueMin = 0;
+ if ((byte1 & 0xE0) == 0xC0)
+ {
+ // 110x xxxx, expect one more byte
+ currentDecodeBits = byte1 & 0x1F;
+ byteCount = 2;
+ expectValueMin = 0x80;
+ }
+ else if ((byte1 & 0xF0) == 0xE0)
+ {
+ // 1110 xxxx, expect two more bytes
+ currentDecodeBits = byte1 & 0x0F;
+ byteCount = 3;
+ expectValueMin = 0x800;
+ }
+ else if ((byte1 & 0xF8) == 0xF0)
+ {
+ // 1111 0xxx, expect three more bytes
+ currentDecodeBits = byte1 & 0x07;
+ byteCount = 4;
+ expectValueMin = 0x10000;
+ }
+ else
+ {
+ // invalid first byte
+ return false;
+ }
+
+ var remainingBytes = byteCount - 1;
+ while (remainingBytes > 0)
+ {
+ // read following three chars
+ if (reader == buffer.Length)
+ {
+ return false;
+ }
+
+ var nextItr = reader;
+ var nextByte = UnescapePercentEncoding(ref nextItr, end, buffer);
+ if (nextByte == -1)
+ {
+ return false;
+ }
+
+ if ((nextByte & 0xC0) != 0x80)
+ {
+ // the follow up byte is not in form of 10xx xxxx
+ return false;
+ }
+
+ currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F);
+ remainingBytes--;
+
+ if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
+ {
+ // this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that
+ // are not allowed in UTF-8;
+ return false;
+ }
+
+ if (remainingBytes == 2 && currentDecodeBits >= 0x110)
+ {
+ // this is going to be out of the upper Unicode bound 0x10FFFF.
+ return false;
+ }
+
+ reader = nextItr;
+ if (byteCount - remainingBytes == 2)
+ {
+ byte2 = nextByte;
+ }
+ else if (byteCount - remainingBytes == 3)
+ {
+ byte3 = nextByte;
+ }
+ else if (byteCount - remainingBytes == 4)
+ {
+ byte4 = nextByte;
+ }
+ }
+
+ if (currentDecodeBits < expectValueMin)
+ {
+ // overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
+ return false;
+ }
+
+ // all bytes are verified, write to the output
+ if (byteCount > 0)
+ {
+ buffer[writer++] = (byte)byte1;
+ }
+ if (byteCount > 1)
+ {
+ buffer[writer++] = (byte)byte2;
+ }
+ if (byteCount > 2)
+ {
+ buffer[writer++] = (byte)byte3;
+ }
+ if (byteCount > 3)
+ {
+ buffer[writer++] = (byte)byte4;
+ }
+
+ return true;
+ }
+
+ private static void Copy(int begin, int end, ref int writer, byte[] buffer)
+ {
+ while (begin != end)
+ {
+ buffer[writer++] = buffer[begin++];
+ }
+ }
+
+ ///
+ /// Read the percent-encoding and try unescape it.
+ ///
+ /// The operation first peek at the character the
+ /// iterator points at. If it is % the is then
+ /// moved on to scan the following to characters. If the two following
+ /// characters are hexadecimal literals they will be unescaped and the
+ /// value will be returned.
+ ///
+ /// If the first character is not % the iterator
+ /// will be removed beyond the location of % and -1 will be returned.
+ ///
+ /// If the following two characters can't be successfully unescaped the
+ /// iterator will be move behind the % and -1
+ /// will be returned.
+ ///
+ /// The value to read
+ /// The byte array
+ /// The unescaped byte if success. Otherwise return -1.
+ private static int UnescapePercentEncoding(ref int scan, int end, byte[] buffer)
+ {
+ if (buffer[scan++] != '%')
+ {
+ return -1;
+ }
+
+ var probe = scan;
+
+ int value1 = ReadHex(ref probe, end, buffer);
+ if (value1 == -1)
+ {
+ return -1;
+ }
+
+ int value2 = ReadHex(ref probe, end, buffer);
+ if (value2 == -1)
+ {
+ return -1;
+ }
+
+ if (SkipUnescape(value1, value2))
+ {
+ return -1;
+ }
+
+ scan = probe;
+ return (value1 << 4) + value2;
+ }
+
+ ///
+ /// Read the next char and convert it into hexadecimal value.
+ ///
+ /// The iterator will be moved to the next
+ /// byte no matter no matter whether the operation successes.
+ ///
+ /// The value to read
+ /// The byte array
+ /// The hexadecimal value if successes, otherwise -1.
+ private static int ReadHex(ref int scan, int end, byte[] buffer)
+ {
+ if (scan == end)
+ {
+ return -1;
+ }
+
+ var value = buffer[scan++];
+ var isHead = (((value >= '0') && (value <= '9')) ||
+ ((value >= 'A') && (value <= 'F')) ||
+ ((value >= 'a') && (value <= 'f')));
+
+ if (!isHead)
+ {
+ return -1;
+ }
+
+ if (value <= '9')
+ {
+ return value - '0';
+ }
+ else if (value <= 'F')
+ {
+ return (value - 'A') + 10;
+ }
+ else // a - f
+ {
+ return (value - 'a') + 10;
+ }
+ }
+
+ private static bool SkipUnescape(int value1, int value2)
+ {
+ // skip %2F
+ if (value1 == 2 && value2 == 15)
+ {
+ return true;
+ }
+
+ return false;
}
}
}
diff --git a/src/Microsoft.Net.Http.Server/RequestProcessing/UrlPathDecoder.cs b/src/Microsoft.Net.Http.Server/RequestProcessing/UrlPathDecoder.cs
deleted file mode 100644
index 7086029810..0000000000
--- a/src/Microsoft.Net.Http.Server/RequestProcessing/UrlPathDecoder.cs
+++ /dev/null
@@ -1,313 +0,0 @@
-// Copyright (c) .NET Foundation. All rights reserved.
-// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
-
-using System;
-
-namespace Microsoft.Net.Http.Server
-{
- public class UrlPathDecoder
- {
- ///
- /// Unescape a given path string which may contain escaped char.
- ///
- /// The raw path string to be unescaped
- /// The unescaped path string
- public static ArraySegment Unescape(ArraySegment rawPath)
- {
- // the slot to read the input
- var reader = rawPath.Offset;
-
- // the slot to write the unescaped byte
- var writer = rawPath.Offset;
-
- // the end of the path
- var end = rawPath.Offset + rawPath.Count;
-
- // the byte array
- var buffer = rawPath.Array;
-
- while (true)
- {
- if (reader == end)
- {
- break;
- }
-
- if (rawPath.Array[reader] == '%')
- {
- var decodeReader = reader;
-
- // If decoding process succeeds, the writer iterator will be moved
- // to the next write-ready location. On the other hand if the scanned
- // percent-encodings cannot be interpreted as sequence of UTF-8 octets,
- // these bytes should be copied to output as is.
- // The decodeReader iterator is always moved to the first byte not yet
- // be scanned after the process. A failed decoding means the chars
- // between the reader and decodeReader can be copied to output untouched.
- if (!DecodeCore(ref decodeReader, ref writer, end, buffer))
- {
- Copy(reader, decodeReader, ref writer, buffer);
- }
-
- reader = decodeReader;
- }
- else
- {
- buffer[writer++] = buffer[reader++];
- }
- }
-
- return new ArraySegment(buffer, rawPath.Offset, writer - rawPath.Offset);
- }
-
- ///
- /// Unescape the percent-encodings
- ///
- /// The iterator point to the first % char
- /// The place to write to
- /// The byte array
- private static bool DecodeCore(ref int reader, ref int writer, int end, byte[] buffer)
- {
- // preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets,
- // bytes from this till the last scanned one will be copied to the memory pointed by writer.
- var byte1 = UnescapePercentEncoding(ref reader, end, buffer);
-
- if (byte1 == 0)
- {
- throw new InvalidOperationException("The path contains null characters.");
- }
-
- if (byte1 == -1)
- {
- return false;
- }
-
- if (byte1 <= 0x7F)
- {
- // first byte < U+007f, it is a single byte ASCII
- buffer[writer++] = (byte)byte1;
- return true;
- }
-
- int byte2 = 0, byte3 = 0, byte4 = 0;
-
- // anticipate more bytes
- var currentDecodeBits = 0;
- var byteCount = 1;
- var expectValueMin = 0;
- if ((byte1 & 0xE0) == 0xC0)
- {
- // 110x xxxx, expect one more byte
- currentDecodeBits = byte1 & 0x1F;
- byteCount = 2;
- expectValueMin = 0x80;
- }
- else if ((byte1 & 0xF0) == 0xE0)
- {
- // 1110 xxxx, expect two more bytes
- currentDecodeBits = byte1 & 0x0F;
- byteCount = 3;
- expectValueMin = 0x800;
- }
- else if ((byte1 & 0xF8) == 0xF0)
- {
- // 1111 0xxx, expect three more bytes
- currentDecodeBits = byte1 & 0x07;
- byteCount = 4;
- expectValueMin = 0x10000;
- }
- else
- {
- // invalid first byte
- return false;
- }
-
- var remainingBytes = byteCount - 1;
- while (remainingBytes > 0)
- {
- // read following three chars
- if (reader == buffer.Length)
- {
- return false;
- }
-
- var nextItr = reader;
- var nextByte = UnescapePercentEncoding(ref nextItr, end, buffer);
- if (nextByte == -1)
- {
- return false;
- }
-
- if ((nextByte & 0xC0) != 0x80)
- {
- // the follow up byte is not in form of 10xx xxxx
- return false;
- }
-
- currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F);
- remainingBytes--;
-
- if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
- {
- // this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that
- // are not allowed in UTF-8;
- return false;
- }
-
- if (remainingBytes == 2 && currentDecodeBits >= 0x110)
- {
- // this is going to be out of the upper Unicode bound 0x10FFFF.
- return false;
- }
-
- reader = nextItr;
- if (byteCount - remainingBytes == 2)
- {
- byte2 = nextByte;
- }
- else if (byteCount - remainingBytes == 3)
- {
- byte3 = nextByte;
- }
- else if (byteCount - remainingBytes == 4)
- {
- byte4 = nextByte;
- }
- }
-
- if (currentDecodeBits < expectValueMin)
- {
- // overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
- return false;
- }
-
- // all bytes are verified, write to the output
- if (byteCount > 0)
- {
- buffer[writer++] = (byte)byte1;
- }
- if (byteCount > 1)
- {
- buffer[writer++] = (byte)byte2;
- }
- if (byteCount > 2)
- {
- buffer[writer++] = (byte)byte3;
- }
- if (byteCount > 3)
- {
- buffer[writer++] = (byte)byte4;
- }
-
- return true;
- }
-
- private static void Copy(int begin, int end, ref int writer, byte[] buffer)
- {
- while (begin != end)
- {
- buffer[writer++] = buffer[begin++];
- }
- }
-
- ///
- /// Read the percent-encoding and try unescape it.
- ///
- /// The operation first peek at the character the
- /// iterator points at. If it is % the is then
- /// moved on to scan the following to characters. If the two following
- /// characters are hexadecimal literals they will be unescaped and the
- /// value will be returned.
- ///
- /// If the first character is not % the iterator
- /// will be removed beyond the location of % and -1 will be returned.
- ///
- /// If the following two characters can't be successfully unescaped the
- /// iterator will be move behind the % and -1
- /// will be returned.
- ///
- /// The value to read
- /// The byte array
- /// The unescaped byte if success. Otherwise return -1.
- private static int UnescapePercentEncoding(ref int scan, int end, byte[] buffer)
- {
- if (buffer[scan++] != '%')
- {
- return -1;
- }
-
- var probe = scan;
-
- int value1 = ReadHex(ref probe, end, buffer);
- if (value1 == -1)
- {
- return -1;
- }
-
- int value2 = ReadHex(ref probe, end, buffer);
- if (value2 == -1)
- {
- return -1;
- }
-
- if (SkipUnescape(value1, value2))
- {
- return -1;
- }
-
- scan = probe;
- return (value1 << 4) + value2;
- }
-
- ///
- /// Read the next char and convert it into hexadecimal value.
- ///
- /// The iterator will be moved to the next
- /// byte no matter no matter whether the operation successes.
- ///
- /// The value to read
- /// The byte array
- /// The hexadecimal value if successes, otherwise -1.
- private static int ReadHex(ref int scan, int end, byte[] buffer)
- {
- if (scan == end)
- {
- return -1;
- }
-
- var value = buffer[scan++];
- var isHead = (((value >= '0') && (value <= '9')) ||
- ((value >= 'A') && (value <= 'F')) ||
- ((value >= 'a') && (value <= 'f')));
-
- if (!isHead)
- {
- return -1;
- }
-
- if (value <= '9')
- {
- return value - '0';
- }
- else if (value <= 'F')
- {
- return (value - 'A') + 10;
- }
- else // a - f
- {
- return (value - 'a') + 10;
- }
- }
-
- private static bool SkipUnescape(int value1, int value2)
- {
- // skip %2F
- if (value1 == 2 && value2 == 15)
- {
- return true;
- }
-
- return false;
- }
- }
-}