Improve URL path decoding and unescaping components
1. Rename UrlInByte to RawUrlHelper and conver it to static class. 2. Combine UrlPathDecoder and RequestUrlBuilder and convert the latter to a static utility class.
This commit is contained in:
parent
ff97efe0d2
commit
b99f98dffb
|
|
@ -1,27 +1,36 @@
|
|||
using System;
|
||||
using System.Text;
|
||||
// Copyright (c) Microsoft Open Technologies, Inc.
|
||||
// All Rights Reserved
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
|
||||
// WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
|
||||
// TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR
|
||||
// NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing
|
||||
// permissions and limitations under the License.
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// <copyright file="SslStatus.cs" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
|
||||
namespace Microsoft.Net.Http.Server
|
||||
{
|
||||
internal class UrlInByte
|
||||
internal static class RawUrlHelper
|
||||
{
|
||||
private static string HTTP_SCHEME = "http://";
|
||||
private static string HTTPS_SCHEME = "https://";
|
||||
|
||||
private readonly byte[] _raw;
|
||||
|
||||
public UrlInByte(byte[] raw)
|
||||
{
|
||||
_raw = raw;
|
||||
Path = LocalPath(_raw);
|
||||
}
|
||||
|
||||
public ArraySegment<byte> Path { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Find the segment of the URI byte array which represents the path.
|
||||
/// </summary>
|
||||
private static ArraySegment<byte> LocalPath(byte[] raw)
|
||||
public static ArraySegment<byte> GetPath(byte[] raw)
|
||||
{
|
||||
// performance
|
||||
var pathStartIndex = 0;
|
||||
|
|
@ -84,27 +93,52 @@ namespace Microsoft.Net.Http.Server
|
|||
/// <returns>Length of the matched bytes, 0 if it is not matched.</returns>
|
||||
private static int FindHttpOrHttps(byte[] raw)
|
||||
{
|
||||
if (raw.Length < 7)
|
||||
if (raw[0] != 'h' && raw[0] != 'H')
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (string.Equals(HTTP_SCHEME, Encoding.UTF8.GetString(raw, 0, 7), StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return 7;
|
||||
}
|
||||
|
||||
if (raw.Length < 8)
|
||||
if (raw[1] != 't' && raw[1] != 'T')
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (string.Equals(HTTPS_SCHEME, Encoding.UTF8.GetString(raw, 0, 8), StringComparison.OrdinalIgnoreCase))
|
||||
if (raw[2] != 't' && raw[2] != 'T')
|
||||
{
|
||||
return 8;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (raw[3] != 'p' && raw[3] != 'P')
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (raw[4] == ':')
|
||||
{
|
||||
if (raw[5] != '/' || raw[6] != '/')
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 7;
|
||||
}
|
||||
}
|
||||
else if (raw[4] == 's' || raw[4] == 'S')
|
||||
{
|
||||
if (raw[5] != ':' || raw[6] != '/' || raw[7] != '/')
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 8;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static int Find(byte[] raw, int begin, char target)
|
||||
|
|
@ -72,7 +72,7 @@ namespace Microsoft.Net.Http.Server
|
|||
var prefix = requestContext.Server.Settings.UrlPrefixes.GetPrefix((int)nativeRequestContext.UrlContext);
|
||||
|
||||
var rawUrlInBytes = _nativeRequestContext.GetRawUrlInBytes();
|
||||
var originalPath = RequestUriBuilder.GetRequestPath(rawUrlInBytes, RequestContext.Logger);
|
||||
var originalPath = RequestUriBuilder.DecodeAndUnescapePath(rawUrlInBytes);
|
||||
|
||||
// 'OPTIONS * HTTP/1.1'
|
||||
if (KnownMethod == HttpApi.HTTP_VERB.HttpVerbOPTIONS && string.Equals(RawUrl, "*", StringComparison.Ordinal))
|
||||
|
|
@ -179,7 +179,7 @@ namespace Microsoft.Net.Http.Server
|
|||
public string Path { get; }
|
||||
|
||||
public bool IsHttps => SslStatus != SslStatus.Insecure;
|
||||
|
||||
|
||||
public string RawUrl { get; }
|
||||
|
||||
public Version ProtocolVersion { get; }
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc.
|
||||
// Copyright (c) Microsoft Open Technologies, Inc.
|
||||
// All Rights Reserved
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
|
@ -21,9 +21,8 @@
|
|||
// </copyright>
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
using System.Diagnostics;
|
||||
using System;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Microsoft.Net.Http.Server
|
||||
{
|
||||
|
|
@ -31,32 +30,333 @@ namespace Microsoft.Net.Http.Server
|
|||
// we also can't just use the raw Uri, since http.sys supports not only UTF-8, but also ANSI/DBCS and
|
||||
// Unicode code points. System.Uri only supports UTF-8.
|
||||
// The purpose of this class is to decode all UTF-8 percent encoded characters, with the
|
||||
// exception of %2F ('/'), which is left encoded.
|
||||
internal sealed class RequestUriBuilder
|
||||
// exception of %2F ('/'), which is left encoded
|
||||
internal static class RequestUriBuilder
|
||||
{
|
||||
private static readonly Encoding Utf8Encoding;
|
||||
private static readonly Encoding UTF8 = new UTF8Encoding(
|
||||
encoderShouldEmitUTF8Identifier: false,
|
||||
throwOnInvalidBytes: true);
|
||||
|
||||
static RequestUriBuilder()
|
||||
public static string DecodeAndUnescapePath(byte[] urlInBytes)
|
||||
{
|
||||
Utf8Encoding = new UTF8Encoding(false, true);
|
||||
}
|
||||
|
||||
// Process only the path.
|
||||
public static string GetRequestPath(byte[] rawUriInBytes, ILogger logger)
|
||||
{
|
||||
//Debug.Assert(rawUriInBytes == null || rawUriInBytes.Length == 0, "Empty raw URL.");
|
||||
//Debug.Assert(logger != null, "Null logger.");
|
||||
|
||||
var rawUriInByte = new UrlInByte(rawUriInBytes);
|
||||
var pathInByte = rawUriInByte.Path;
|
||||
|
||||
if (pathInByte.Count == 1 && pathInByte.Array[pathInByte.Offset] == '*')
|
||||
if (urlInBytes == null)
|
||||
{
|
||||
return "/*";
|
||||
throw new ArgumentNullException(nameof(urlInBytes));
|
||||
}
|
||||
|
||||
var unescapedRaw = UrlPathDecoder.Unescape(pathInByte);
|
||||
return Utf8Encoding.GetString(unescapedRaw.Array, unescapedRaw.Offset, unescapedRaw.Count);
|
||||
if (urlInBytes.Length == 0)
|
||||
{
|
||||
throw new ArgumentException("Length of the URL cannot be zero.", nameof(urlInBytes));
|
||||
}
|
||||
|
||||
var rawPath = RawUrlHelper.GetPath(urlInBytes);
|
||||
|
||||
var unescapedPath = Unescape(rawPath);
|
||||
|
||||
return UTF8.GetString(unescapedPath.Array, unescapedPath.Offset, unescapedPath.Count);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unescape a given path string which may contain escaped char.
|
||||
/// </summary>
|
||||
/// <param name="rawPath">The raw path string to be unescaped</param>
|
||||
/// <returns>The unescaped path string</returns>
|
||||
private static ArraySegment<byte> Unescape(ArraySegment<byte> rawPath)
|
||||
{
|
||||
// the slot to read the input
|
||||
var reader = rawPath.Offset;
|
||||
|
||||
// the slot to write the unescaped byte
|
||||
var writer = rawPath.Offset;
|
||||
|
||||
// the end of the path
|
||||
var end = rawPath.Offset + rawPath.Count;
|
||||
|
||||
// the byte array
|
||||
var buffer = rawPath.Array;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (reader == end)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (rawPath.Array[reader] == '%')
|
||||
{
|
||||
var decodeReader = reader;
|
||||
|
||||
// If decoding process succeeds, the writer iterator will be moved
|
||||
// to the next write-ready location. On the other hand if the scanned
|
||||
// percent-encodings cannot be interpreted as sequence of UTF-8 octets,
|
||||
// these bytes should be copied to output as is.
|
||||
// The decodeReader iterator is always moved to the first byte not yet
|
||||
// be scanned after the process. A failed decoding means the chars
|
||||
// between the reader and decodeReader can be copied to output untouched.
|
||||
if (!DecodeCore(ref decodeReader, ref writer, end, buffer))
|
||||
{
|
||||
Copy(reader, decodeReader, ref writer, buffer);
|
||||
}
|
||||
|
||||
reader = decodeReader;
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer[writer++] = buffer[reader++];
|
||||
}
|
||||
}
|
||||
|
||||
return new ArraySegment<byte>(buffer, rawPath.Offset, writer - rawPath.Offset);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unescape the percent-encodings
|
||||
/// </summary>
|
||||
/// <param name="reader">The iterator point to the first % char</param>
|
||||
/// <param name="writer">The place to write to</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
private static bool DecodeCore(ref int reader, ref int writer, int end, byte[] buffer)
|
||||
{
|
||||
// preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets,
|
||||
// bytes from this till the last scanned one will be copied to the memory pointed by writer.
|
||||
var byte1 = UnescapePercentEncoding(ref reader, end, buffer);
|
||||
|
||||
if (byte1 == 0)
|
||||
{
|
||||
throw new InvalidOperationException("The path contains null characters.");
|
||||
}
|
||||
|
||||
if (byte1 == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (byte1 <= 0x7F)
|
||||
{
|
||||
// first byte < U+007f, it is a single byte ASCII
|
||||
buffer[writer++] = (byte)byte1;
|
||||
return true;
|
||||
}
|
||||
|
||||
int byte2 = 0, byte3 = 0, byte4 = 0;
|
||||
|
||||
// anticipate more bytes
|
||||
var currentDecodeBits = 0;
|
||||
var byteCount = 1;
|
||||
var expectValueMin = 0;
|
||||
if ((byte1 & 0xE0) == 0xC0)
|
||||
{
|
||||
// 110x xxxx, expect one more byte
|
||||
currentDecodeBits = byte1 & 0x1F;
|
||||
byteCount = 2;
|
||||
expectValueMin = 0x80;
|
||||
}
|
||||
else if ((byte1 & 0xF0) == 0xE0)
|
||||
{
|
||||
// 1110 xxxx, expect two more bytes
|
||||
currentDecodeBits = byte1 & 0x0F;
|
||||
byteCount = 3;
|
||||
expectValueMin = 0x800;
|
||||
}
|
||||
else if ((byte1 & 0xF8) == 0xF0)
|
||||
{
|
||||
// 1111 0xxx, expect three more bytes
|
||||
currentDecodeBits = byte1 & 0x07;
|
||||
byteCount = 4;
|
||||
expectValueMin = 0x10000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// invalid first byte
|
||||
return false;
|
||||
}
|
||||
|
||||
var remainingBytes = byteCount - 1;
|
||||
while (remainingBytes > 0)
|
||||
{
|
||||
// read following three chars
|
||||
if (reader == buffer.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var nextItr = reader;
|
||||
var nextByte = UnescapePercentEncoding(ref nextItr, end, buffer);
|
||||
if (nextByte == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((nextByte & 0xC0) != 0x80)
|
||||
{
|
||||
// the follow up byte is not in form of 10xx xxxx
|
||||
return false;
|
||||
}
|
||||
|
||||
currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F);
|
||||
remainingBytes--;
|
||||
|
||||
if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
|
||||
{
|
||||
// this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that
|
||||
// are not allowed in UTF-8;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (remainingBytes == 2 && currentDecodeBits >= 0x110)
|
||||
{
|
||||
// this is going to be out of the upper Unicode bound 0x10FFFF.
|
||||
return false;
|
||||
}
|
||||
|
||||
reader = nextItr;
|
||||
if (byteCount - remainingBytes == 2)
|
||||
{
|
||||
byte2 = nextByte;
|
||||
}
|
||||
else if (byteCount - remainingBytes == 3)
|
||||
{
|
||||
byte3 = nextByte;
|
||||
}
|
||||
else if (byteCount - remainingBytes == 4)
|
||||
{
|
||||
byte4 = nextByte;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentDecodeBits < expectValueMin)
|
||||
{
|
||||
// overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
|
||||
return false;
|
||||
}
|
||||
|
||||
// all bytes are verified, write to the output
|
||||
if (byteCount > 0)
|
||||
{
|
||||
buffer[writer++] = (byte)byte1;
|
||||
}
|
||||
if (byteCount > 1)
|
||||
{
|
||||
buffer[writer++] = (byte)byte2;
|
||||
}
|
||||
if (byteCount > 2)
|
||||
{
|
||||
buffer[writer++] = (byte)byte3;
|
||||
}
|
||||
if (byteCount > 3)
|
||||
{
|
||||
buffer[writer++] = (byte)byte4;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void Copy(int begin, int end, ref int writer, byte[] buffer)
|
||||
{
|
||||
while (begin != end)
|
||||
{
|
||||
buffer[writer++] = buffer[begin++];
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the percent-encoding and try unescape it.
|
||||
///
|
||||
/// The operation first peek at the character the <paramref name="scan"/>
|
||||
/// iterator points at. If it is % the <paramref name="scan"/> is then
|
||||
/// moved on to scan the following to characters. If the two following
|
||||
/// characters are hexadecimal literals they will be unescaped and the
|
||||
/// value will be returned.
|
||||
///
|
||||
/// If the first character is not % the <paramref name="scan"/> iterator
|
||||
/// will be removed beyond the location of % and -1 will be returned.
|
||||
///
|
||||
/// If the following two characters can't be successfully unescaped the
|
||||
/// <paramref name="scan"/> iterator will be move behind the % and -1
|
||||
/// will be returned.
|
||||
/// </summary>
|
||||
/// <param name="scan">The value to read</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
/// <returns>The unescaped byte if success. Otherwise return -1.</returns>
|
||||
private static int UnescapePercentEncoding(ref int scan, int end, byte[] buffer)
|
||||
{
|
||||
if (buffer[scan++] != '%')
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
var probe = scan;
|
||||
|
||||
int value1 = ReadHex(ref probe, end, buffer);
|
||||
if (value1 == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int value2 = ReadHex(ref probe, end, buffer);
|
||||
if (value2 == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (SkipUnescape(value1, value2))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
scan = probe;
|
||||
return (value1 << 4) + value2;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the next char and convert it into hexadecimal value.
|
||||
///
|
||||
/// The <paramref name="scan"/> iterator will be moved to the next
|
||||
/// byte no matter no matter whether the operation successes.
|
||||
/// </summary>
|
||||
/// <param name="scan">The value to read</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
/// <returns>The hexadecimal value if successes, otherwise -1.</returns>
|
||||
private static int ReadHex(ref int scan, int end, byte[] buffer)
|
||||
{
|
||||
if (scan == end)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
var value = buffer[scan++];
|
||||
var isHead = (((value >= '0') && (value <= '9')) ||
|
||||
((value >= 'A') && (value <= 'F')) ||
|
||||
((value >= 'a') && (value <= 'f')));
|
||||
|
||||
if (!isHead)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (value <= '9')
|
||||
{
|
||||
return value - '0';
|
||||
}
|
||||
else if (value <= 'F')
|
||||
{
|
||||
return (value - 'A') + 10;
|
||||
}
|
||||
else // a - f
|
||||
{
|
||||
return (value - 'a') + 10;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool SkipUnescape(int value1, int value2)
|
||||
{
|
||||
// skip %2F
|
||||
if (value1 == 2 && value2 == 15)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,313 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
|
||||
namespace Microsoft.Net.Http.Server
|
||||
{
|
||||
public class UrlPathDecoder
|
||||
{
|
||||
/// <summary>
|
||||
/// Unescape a given path string which may contain escaped char.
|
||||
/// </summary>
|
||||
/// <param name="rawPath">The raw path string to be unescaped</param>
|
||||
/// <returns>The unescaped path string</returns>
|
||||
public static ArraySegment<byte> Unescape(ArraySegment<byte> rawPath)
|
||||
{
|
||||
// the slot to read the input
|
||||
var reader = rawPath.Offset;
|
||||
|
||||
// the slot to write the unescaped byte
|
||||
var writer = rawPath.Offset;
|
||||
|
||||
// the end of the path
|
||||
var end = rawPath.Offset + rawPath.Count;
|
||||
|
||||
// the byte array
|
||||
var buffer = rawPath.Array;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (reader == end)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (rawPath.Array[reader] == '%')
|
||||
{
|
||||
var decodeReader = reader;
|
||||
|
||||
// If decoding process succeeds, the writer iterator will be moved
|
||||
// to the next write-ready location. On the other hand if the scanned
|
||||
// percent-encodings cannot be interpreted as sequence of UTF-8 octets,
|
||||
// these bytes should be copied to output as is.
|
||||
// The decodeReader iterator is always moved to the first byte not yet
|
||||
// be scanned after the process. A failed decoding means the chars
|
||||
// between the reader and decodeReader can be copied to output untouched.
|
||||
if (!DecodeCore(ref decodeReader, ref writer, end, buffer))
|
||||
{
|
||||
Copy(reader, decodeReader, ref writer, buffer);
|
||||
}
|
||||
|
||||
reader = decodeReader;
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer[writer++] = buffer[reader++];
|
||||
}
|
||||
}
|
||||
|
||||
return new ArraySegment<byte>(buffer, rawPath.Offset, writer - rawPath.Offset);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unescape the percent-encodings
|
||||
/// </summary>
|
||||
/// <param name="reader">The iterator point to the first % char</param>
|
||||
/// <param name="writer">The place to write to</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
private static bool DecodeCore(ref int reader, ref int writer, int end, byte[] buffer)
|
||||
{
|
||||
// preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets,
|
||||
// bytes from this till the last scanned one will be copied to the memory pointed by writer.
|
||||
var byte1 = UnescapePercentEncoding(ref reader, end, buffer);
|
||||
|
||||
if (byte1 == 0)
|
||||
{
|
||||
throw new InvalidOperationException("The path contains null characters.");
|
||||
}
|
||||
|
||||
if (byte1 == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (byte1 <= 0x7F)
|
||||
{
|
||||
// first byte < U+007f, it is a single byte ASCII
|
||||
buffer[writer++] = (byte)byte1;
|
||||
return true;
|
||||
}
|
||||
|
||||
int byte2 = 0, byte3 = 0, byte4 = 0;
|
||||
|
||||
// anticipate more bytes
|
||||
var currentDecodeBits = 0;
|
||||
var byteCount = 1;
|
||||
var expectValueMin = 0;
|
||||
if ((byte1 & 0xE0) == 0xC0)
|
||||
{
|
||||
// 110x xxxx, expect one more byte
|
||||
currentDecodeBits = byte1 & 0x1F;
|
||||
byteCount = 2;
|
||||
expectValueMin = 0x80;
|
||||
}
|
||||
else if ((byte1 & 0xF0) == 0xE0)
|
||||
{
|
||||
// 1110 xxxx, expect two more bytes
|
||||
currentDecodeBits = byte1 & 0x0F;
|
||||
byteCount = 3;
|
||||
expectValueMin = 0x800;
|
||||
}
|
||||
else if ((byte1 & 0xF8) == 0xF0)
|
||||
{
|
||||
// 1111 0xxx, expect three more bytes
|
||||
currentDecodeBits = byte1 & 0x07;
|
||||
byteCount = 4;
|
||||
expectValueMin = 0x10000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// invalid first byte
|
||||
return false;
|
||||
}
|
||||
|
||||
var remainingBytes = byteCount - 1;
|
||||
while (remainingBytes > 0)
|
||||
{
|
||||
// read following three chars
|
||||
if (reader == buffer.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var nextItr = reader;
|
||||
var nextByte = UnescapePercentEncoding(ref nextItr, end, buffer);
|
||||
if (nextByte == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((nextByte & 0xC0) != 0x80)
|
||||
{
|
||||
// the follow up byte is not in form of 10xx xxxx
|
||||
return false;
|
||||
}
|
||||
|
||||
currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F);
|
||||
remainingBytes--;
|
||||
|
||||
if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
|
||||
{
|
||||
// this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that
|
||||
// are not allowed in UTF-8;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (remainingBytes == 2 && currentDecodeBits >= 0x110)
|
||||
{
|
||||
// this is going to be out of the upper Unicode bound 0x10FFFF.
|
||||
return false;
|
||||
}
|
||||
|
||||
reader = nextItr;
|
||||
if (byteCount - remainingBytes == 2)
|
||||
{
|
||||
byte2 = nextByte;
|
||||
}
|
||||
else if (byteCount - remainingBytes == 3)
|
||||
{
|
||||
byte3 = nextByte;
|
||||
}
|
||||
else if (byteCount - remainingBytes == 4)
|
||||
{
|
||||
byte4 = nextByte;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentDecodeBits < expectValueMin)
|
||||
{
|
||||
// overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
|
||||
return false;
|
||||
}
|
||||
|
||||
// all bytes are verified, write to the output
|
||||
if (byteCount > 0)
|
||||
{
|
||||
buffer[writer++] = (byte)byte1;
|
||||
}
|
||||
if (byteCount > 1)
|
||||
{
|
||||
buffer[writer++] = (byte)byte2;
|
||||
}
|
||||
if (byteCount > 2)
|
||||
{
|
||||
buffer[writer++] = (byte)byte3;
|
||||
}
|
||||
if (byteCount > 3)
|
||||
{
|
||||
buffer[writer++] = (byte)byte4;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void Copy(int begin, int end, ref int writer, byte[] buffer)
|
||||
{
|
||||
while (begin != end)
|
||||
{
|
||||
buffer[writer++] = buffer[begin++];
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the percent-encoding and try unescape it.
|
||||
///
|
||||
/// The operation first peek at the character the <paramref name="scan"/>
|
||||
/// iterator points at. If it is % the <paramref name="scan"/> is then
|
||||
/// moved on to scan the following to characters. If the two following
|
||||
/// characters are hexadecimal literals they will be unescaped and the
|
||||
/// value will be returned.
|
||||
///
|
||||
/// If the first character is not % the <paramref name="scan"/> iterator
|
||||
/// will be removed beyond the location of % and -1 will be returned.
|
||||
///
|
||||
/// If the following two characters can't be successfully unescaped the
|
||||
/// <paramref name="scan"/> iterator will be move behind the % and -1
|
||||
/// will be returned.
|
||||
/// </summary>
|
||||
/// <param name="scan">The value to read</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
/// <returns>The unescaped byte if success. Otherwise return -1.</returns>
|
||||
private static int UnescapePercentEncoding(ref int scan, int end, byte[] buffer)
|
||||
{
|
||||
if (buffer[scan++] != '%')
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
var probe = scan;
|
||||
|
||||
int value1 = ReadHex(ref probe, end, buffer);
|
||||
if (value1 == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int value2 = ReadHex(ref probe, end, buffer);
|
||||
if (value2 == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (SkipUnescape(value1, value2))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
scan = probe;
|
||||
return (value1 << 4) + value2;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the next char and convert it into hexadecimal value.
|
||||
///
|
||||
/// The <paramref name="scan"/> iterator will be moved to the next
|
||||
/// byte no matter no matter whether the operation successes.
|
||||
/// </summary>
|
||||
/// <param name="scan">The value to read</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
/// <returns>The hexadecimal value if successes, otherwise -1.</returns>
|
||||
private static int ReadHex(ref int scan, int end, byte[] buffer)
|
||||
{
|
||||
if (scan == end)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
var value = buffer[scan++];
|
||||
var isHead = (((value >= '0') && (value <= '9')) ||
|
||||
((value >= 'A') && (value <= 'F')) ||
|
||||
((value >= 'a') && (value <= 'f')));
|
||||
|
||||
if (!isHead)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (value <= '9')
|
||||
{
|
||||
return value - '0';
|
||||
}
|
||||
else if (value <= 'F')
|
||||
{
|
||||
return (value - 'A') + 10;
|
||||
}
|
||||
else // a - f
|
||||
{
|
||||
return (value - 'a') + 10;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool SkipUnescape(int value1, int value2)
|
||||
{
|
||||
// skip %2F
|
||||
if (value1 == 2 && value2 == 15)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue