#391 Migrate to System.Text.Encoding.Web
This commit is contained in:
parent
f177f0c760
commit
be4fb46281
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.23107.0
|
||||
|
|
@ -39,8 +38,6 @@ Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Extensions.WebEnc
|
|||
EndProject
|
||||
Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Extensions.WebEncoders.Tests", "test\Microsoft.Extensions.WebEncoders.Tests\Microsoft.Extensions.WebEncoders.Tests.xproj", "{7AE2731D-43CD-4CF8-850A-4914DE2CE930}"
|
||||
EndProject
|
||||
Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Extensions.WebEncoders.Core", "src\Microsoft.Extensions.WebEncoders.Core\Microsoft.Extensions.WebEncoders.Core.xproj", "{BE9112CB-D87D-4080-9CC3-24492D49CBE6}"
|
||||
EndProject
|
||||
Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.AspNet.Html.Abstractions", "src\Microsoft.AspNet.Html.Abstractions\Microsoft.AspNet.Html.Abstractions.xproj", "{68A28E4A-3ADE-4187-9625-4FF185887CB3}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "samples", "samples", "{982F09D8-621E-4872-BA7B-BBDEA47D1EFD}"
|
||||
|
|
@ -231,18 +228,6 @@ Global
|
|||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Mixed Platforms.Build.0 = Release|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|x86.Build.0 = Release|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Release|Mixed Platforms.Build.0 = Release|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6}.Release|x86.Build.0 = Release|Any CPU
|
||||
{68A28E4A-3ADE-4187-9625-4FF185887CB3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{68A28E4A-3ADE-4187-9625-4FF185887CB3}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{68A28E4A-3ADE-4187-9625-4FF185887CB3}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
|
||||
|
|
@ -324,7 +309,6 @@ Global
|
|||
{E6BB7AD1-BD10-4A23-B780-F4A86ADF00D1} = {F31FF137-390C-49BF-A3BD-7C6ED3597C21}
|
||||
{DD2CE416-765E-4000-A03E-C2FF165DA1B6} = {A5A15F1C-885A-452A-A731-B0173DDBD913}
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930} = {F31FF137-390C-49BF-A3BD-7C6ED3597C21}
|
||||
{BE9112CB-D87D-4080-9CC3-24492D49CBE6} = {A5A15F1C-885A-452A-A731-B0173DDBD913}
|
||||
{68A28E4A-3ADE-4187-9625-4FF185887CB3} = {A5A15F1C-885A-452A-A731-B0173DDBD913}
|
||||
{1D0764B4-1DEB-4232-A714-D4B7E846918A} = {982F09D8-621E-4872-BA7B-BBDEA47D1EFD}
|
||||
{2D187B88-94BD-4A39-AC97-F8F8B9363301} = {F31FF137-390C-49BF-A3BD-7C6ED3597C21}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ using System;
|
|||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.AspNet.Html.Abstractions
|
||||
{
|
||||
|
|
@ -197,7 +197,7 @@ namespace Microsoft.AspNet.Html.Abstractions
|
|||
_value = value;
|
||||
}
|
||||
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
writer.Write(_value);
|
||||
}
|
||||
|
|
@ -234,7 +234,7 @@ namespace Microsoft.AspNet.Html.Abstractions
|
|||
_args = args;
|
||||
}
|
||||
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
if (writer == null)
|
||||
{
|
||||
|
|
@ -269,10 +269,10 @@ namespace Microsoft.AspNet.Html.Abstractions
|
|||
// https://msdn.microsoft.com/en-us/library/system.string.format(v=vs.110).aspx#Format6_Example
|
||||
private class EncodingFormatProvider : IFormatProvider, ICustomFormatter
|
||||
{
|
||||
private readonly IHtmlEncoder _encoder;
|
||||
private readonly HtmlEncoder _encoder;
|
||||
private readonly IFormatProvider _formatProvider;
|
||||
|
||||
public EncodingFormatProvider(IFormatProvider formatProvider, IHtmlEncoder encoder)
|
||||
public EncodingFormatProvider(IFormatProvider formatProvider, HtmlEncoder encoder)
|
||||
{
|
||||
Debug.Assert(formatProvider != null);
|
||||
Debug.Assert(encoder != null);
|
||||
|
|
@ -306,7 +306,7 @@ namespace Microsoft.AspNet.Html.Abstractions
|
|||
var result = customFormatter.Format(format, arg, _formatProvider);
|
||||
if (result != null)
|
||||
{
|
||||
return _encoder.HtmlEncode(result);
|
||||
return _encoder.Encode(result);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -320,7 +320,7 @@ namespace Microsoft.AspNet.Html.Abstractions
|
|||
var result = formattable.ToString(format, _formatProvider);
|
||||
if (result != null)
|
||||
{
|
||||
return _encoder.HtmlEncode(result);
|
||||
return _encoder.Encode(result);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -330,7 +330,7 @@ namespace Microsoft.AspNet.Html.Abstractions
|
|||
var result = arg.ToString();
|
||||
if (result != null)
|
||||
{
|
||||
return _encoder.HtmlEncode(result);
|
||||
return _encoder.Encode(result);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.IO;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.AspNet.Html.Abstractions
|
||||
{
|
||||
|
|
@ -16,7 +16,7 @@ namespace Microsoft.AspNet.Html.Abstractions
|
|||
/// to the specified <paramref name="writer"/>.
|
||||
/// </summary>
|
||||
/// <param name="writer">The <see cref="TextWriter"/> to which the content is written.</param>
|
||||
/// <param name="encoder">The <see cref="IHtmlEncoder"/> which encodes the content to be written.</param>
|
||||
void WriteTo(TextWriter writer, IHtmlEncoder encoder);
|
||||
/// <param name="encoder">The <see cref="HtmlEncoder"/> which encodes the content to be written.</param>
|
||||
void WriteTo(TextWriter writer, HtmlEncoder encoder);
|
||||
}
|
||||
}
|
||||
|
|
@ -9,10 +9,15 @@
|
|||
"warningsAsErrors": true
|
||||
},
|
||||
"dependencies": {
|
||||
"Microsoft.Extensions.WebEncoders.Core": "1.0.0-*"
|
||||
"System.Text.Encodings.Web": "4.0.0-beta-*"
|
||||
},
|
||||
"frameworks": {
|
||||
"net451": {},
|
||||
"net451": {
|
||||
"frameworkAssemblies": {
|
||||
"System.IO": "",
|
||||
"System.Runtime": ""
|
||||
}
|
||||
},
|
||||
"dotnet5.4": {
|
||||
"dependencies": {
|
||||
"System.Resources.ResourceManager": "4.0.1-beta-*"
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
using System;
|
||||
using System.Linq;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.AspNet.Http
|
||||
{
|
||||
|
|
@ -66,7 +66,7 @@ namespace Microsoft.AspNet.Http
|
|||
public string ToUriComponent()
|
||||
{
|
||||
// TODO: Measure the cost of this escaping and consider optimizing.
|
||||
return HasValue ? string.Join("/", _value.Split('/').Select(UrlEncoder.Default.UrlEncode)) : string.Empty;
|
||||
return HasValue ? string.Join("/", _value.Split('/').Select(UrlEncoder.Default.Encode)) : string.Empty;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.Extensions.Primitives;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
|
||||
namespace Microsoft.AspNet.Http
|
||||
{
|
||||
|
|
@ -119,7 +119,16 @@ namespace Microsoft.AspNet.Http
|
|||
/// <returns>The resulting QueryString</returns>
|
||||
public static QueryString Create(string name, string value)
|
||||
{
|
||||
return new QueryString("?" + UrlEncoder.Default.UrlEncode(name) + '=' + UrlEncoder.Default.UrlEncode(value));
|
||||
if (name == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(name));
|
||||
}
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
return new QueryString("?" + UrlEncoder.Default.Encode(name) + '=' + UrlEncoder.Default.Encode(value));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
@ -135,9 +144,9 @@ namespace Microsoft.AspNet.Http
|
|||
{
|
||||
builder.Append(first ? "?" : "&");
|
||||
first = false;
|
||||
builder.Append(UrlEncoder.Default.UrlEncode(pair.Key));
|
||||
builder.Append(UrlEncoder.Default.Encode(pair.Key));
|
||||
builder.Append("=");
|
||||
builder.Append(UrlEncoder.Default.UrlEncode(pair.Value));
|
||||
builder.Append(UrlEncoder.Default.Encode(pair.Value));
|
||||
}
|
||||
|
||||
return new QueryString(builder.ToString());
|
||||
|
|
@ -158,9 +167,9 @@ namespace Microsoft.AspNet.Http
|
|||
{
|
||||
builder.Append(first ? "?" : "&");
|
||||
first = false;
|
||||
builder.Append(UrlEncoder.Default.UrlEncode(pair.Key));
|
||||
builder.Append(UrlEncoder.Default.Encode(pair.Key));
|
||||
builder.Append("=");
|
||||
builder.Append(UrlEncoder.Default.UrlEncode(value));
|
||||
builder.Append(UrlEncoder.Default.Encode(value));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -184,6 +193,15 @@ namespace Microsoft.AspNet.Http
|
|||
|
||||
public QueryString Add(string name, string value)
|
||||
{
|
||||
if (name == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(name));
|
||||
}
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
if (!HasValue || Value.Equals("?", StringComparison.Ordinal))
|
||||
{
|
||||
return Create(name, value);
|
||||
|
|
@ -191,9 +209,9 @@ namespace Microsoft.AspNet.Http
|
|||
|
||||
var builder = new StringBuilder(Value);
|
||||
builder.Append("&");
|
||||
builder.Append(UrlEncoder.Default.UrlEncode(name));
|
||||
builder.Append(UrlEncoder.Default.Encode(name));
|
||||
builder.Append("=");
|
||||
builder.Append(UrlEncoder.Default.UrlEncode(value));
|
||||
builder.Append(UrlEncoder.Default.Encode(value));
|
||||
return new QueryString(builder.ToString());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,13 +14,19 @@
|
|||
"type": "build",
|
||||
"version": "1.0.0-*"
|
||||
},
|
||||
"Microsoft.Extensions.WebEncoders.Core": "1.0.0-*"
|
||||
"System.Text.Encodings.Web": "4.0.0-beta-*"
|
||||
},
|
||||
"frameworks": {
|
||||
"net451": {},
|
||||
"net451": {
|
||||
"frameworkAssemblies": {
|
||||
"System.IO": "",
|
||||
"System.Runtime": ""
|
||||
}
|
||||
},
|
||||
"dotnet5.4": {
|
||||
"dependencies": {
|
||||
"System.Collections": "4.0.11-beta-*",
|
||||
"System.ComponentModel": "4.0.1-beta-*",
|
||||
"System.Diagnostics.Tools": "4.0.1-beta-*",
|
||||
"System.Globalization": "4.0.11-beta-*",
|
||||
"System.Globalization.Extensions": "4.0.1-beta-*",
|
||||
|
|
@ -38,4 +44,4 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.AspNet.Http.Extensions
|
||||
{
|
||||
|
|
@ -45,9 +45,9 @@ namespace Microsoft.AspNet.Http.Extensions
|
|||
var pair = _params[i];
|
||||
builder.Append(first ? "?" : "&");
|
||||
first = false;
|
||||
builder.Append(UrlEncoder.Default.UrlEncode(pair.Key));
|
||||
builder.Append(UrlEncoder.Default.Encode(pair.Key));
|
||||
builder.Append("=");
|
||||
builder.Append(UrlEncoder.Default.UrlEncode(pair.Value));
|
||||
builder.Append(UrlEncoder.Default.Encode(pair.Value));
|
||||
}
|
||||
|
||||
return builder.ToString();
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"Microsoft.AspNet.Http.Abstractions": "1.0.0-*",
|
||||
"Microsoft.Extensions.WebEncoders.Core": "1.0.0-*",
|
||||
"Microsoft.Net.Http.Headers": "1.0.0-*"
|
||||
},
|
||||
"frameworks": {
|
||||
|
|
@ -23,4 +22,4 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
using System;
|
||||
using System.Linq;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.Extensions.Primitives;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
using Microsoft.Net.Http.Headers;
|
||||
|
||||
namespace Microsoft.AspNet.Http.Internal
|
||||
|
|
@ -38,8 +38,8 @@ namespace Microsoft.AspNet.Http.Internal
|
|||
public void Append(string key, string value)
|
||||
{
|
||||
var setCookieHeaderValue = new SetCookieHeaderValue(
|
||||
UrlEncoder.Default.UrlEncode(key),
|
||||
UrlEncoder.Default.UrlEncode(value))
|
||||
UrlEncoder.Default.Encode(key),
|
||||
UrlEncoder.Default.Encode(value))
|
||||
{
|
||||
Path = "/"
|
||||
};
|
||||
|
|
@ -61,8 +61,8 @@ namespace Microsoft.AspNet.Http.Internal
|
|||
}
|
||||
|
||||
var setCookieHeaderValue = new SetCookieHeaderValue(
|
||||
UrlEncoder.Default.UrlEncode(key),
|
||||
UrlEncoder.Default.UrlEncode(value))
|
||||
UrlEncoder.Default.Encode(key),
|
||||
UrlEncoder.Default.Encode(value))
|
||||
{
|
||||
Domain = options.Domain,
|
||||
Path = options.Path,
|
||||
|
|
@ -80,7 +80,7 @@ namespace Microsoft.AspNet.Http.Internal
|
|||
/// <param name="key"></param>
|
||||
public void Delete(string key)
|
||||
{
|
||||
var encodedKeyPlusEquals = UrlEncoder.Default.UrlEncode(key) + "=";
|
||||
var encodedKeyPlusEquals = UrlEncoder.Default.Encode(key) + "=";
|
||||
Func<string, bool> predicate = value => value.StartsWith(encodedKeyPlusEquals, StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
StringValues deleteCookies = encodedKeyPlusEquals + "; expires=Thu, 01-Jan-1970 00:00:00 GMT";
|
||||
|
|
@ -107,7 +107,7 @@ namespace Microsoft.AspNet.Http.Internal
|
|||
throw new ArgumentNullException(nameof(options));
|
||||
}
|
||||
|
||||
var encodedKeyPlusEquals = UrlEncoder.Default.UrlEncode(key) + "=";
|
||||
var encodedKeyPlusEquals = UrlEncoder.Default.Encode(key) + "=";
|
||||
bool domainHasValue = !string.IsNullOrEmpty(options.Domain);
|
||||
bool pathHasValue = !string.IsNullOrEmpty(options.Path);
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,8 @@
|
|||
"dotnet5.4": {
|
||||
"dependencies": {
|
||||
"System.Diagnostics.Debug": "4.0.11-beta-*",
|
||||
"System.Text.Encoding": "4.0.11-beta-*"
|
||||
"System.Text.Encoding": "4.0.11-beta-*",
|
||||
"System.Threading": "4.0.11-beta-*"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.Extensions.Primitives;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
|
||||
namespace Microsoft.AspNet.WebUtilities
|
||||
{
|
||||
|
|
@ -92,9 +92,9 @@ namespace Microsoft.AspNet.WebUtilities
|
|||
foreach (var parameter in queryString)
|
||||
{
|
||||
sb.Append(hasQuery ? '&' : '?');
|
||||
sb.Append(UrlEncoder.Default.UrlEncode(parameter.Key));
|
||||
sb.Append(UrlEncoder.Default.Encode(parameter.Key));
|
||||
sb.Append('=');
|
||||
sb.Append(UrlEncoder.Default.UrlEncode(parameter.Value));
|
||||
sb.Append(UrlEncoder.Default.Encode(parameter.Value));
|
||||
hasQuery = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,10 +10,14 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"Microsoft.Extensions.Primitives": "1.0.0-*",
|
||||
"Microsoft.Extensions.WebEncoders.Core": "1.0.0-*"
|
||||
"System.Text.Encodings.Web": "4.0.0-beta-*"
|
||||
},
|
||||
"frameworks": {
|
||||
"net451": {},
|
||||
"net451": {
|
||||
"frameworkAssemblies": {
|
||||
"System.Runtime": ""
|
||||
}
|
||||
},
|
||||
"dotnet5.4": {
|
||||
"dependencies": {
|
||||
"System.Collections": "4.0.11-beta-*",
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ using System;
|
|||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.AspNet.Html.Abstractions;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
|
||||
|
|
@ -64,7 +65,7 @@ namespace Microsoft.Extensions.Internal
|
|||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
if (writer == null)
|
||||
{
|
||||
|
|
@ -86,7 +87,7 @@ namespace Microsoft.Extensions.Internal
|
|||
var entryAsString = entry as string;
|
||||
if (entryAsString != null)
|
||||
{
|
||||
encoder.HtmlEncode(entryAsString, writer);
|
||||
encoder.Encode(writer, entryAsString);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -116,7 +117,7 @@ namespace Microsoft.Extensions.Internal
|
|||
_value = value;
|
||||
}
|
||||
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
writer.Write(_value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,77 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
internal struct AllowedCharsBitmap
|
||||
{
|
||||
private const int ALLOWED_CHARS_BITMAP_LENGTH = 0x10000 / (8 * sizeof(uint));
|
||||
private readonly uint[] _allowedCharsBitmap;
|
||||
|
||||
private AllowedCharsBitmap(uint[] allowedCharsBitmap)
|
||||
{
|
||||
Debug.Assert(allowedCharsBitmap != null);
|
||||
_allowedCharsBitmap = allowedCharsBitmap;
|
||||
}
|
||||
|
||||
// Marks a character as allowed (can be returned unencoded)
|
||||
public void AllowCharacter(char c)
|
||||
{
|
||||
uint codePoint = (uint)c;
|
||||
int index = (int)(codePoint >> 5);
|
||||
int offset = (int)(codePoint & 0x1FU);
|
||||
_allowedCharsBitmap[index] |= 0x1U << offset;
|
||||
}
|
||||
|
||||
// Marks all characters as forbidden (must be returned encoded)
|
||||
public void Clear()
|
||||
{
|
||||
Array.Clear(_allowedCharsBitmap, 0, _allowedCharsBitmap.Length);
|
||||
}
|
||||
|
||||
// Creates a deep copy of this bitmap
|
||||
public AllowedCharsBitmap Clone()
|
||||
{
|
||||
return new AllowedCharsBitmap((uint[])_allowedCharsBitmap.Clone());
|
||||
}
|
||||
|
||||
// should be called in place of the ctor
|
||||
public static AllowedCharsBitmap CreateNew()
|
||||
{
|
||||
return new AllowedCharsBitmap(new uint[ALLOWED_CHARS_BITMAP_LENGTH]);
|
||||
}
|
||||
|
||||
// Marks a character as forbidden (must be returned encoded)
|
||||
public void ForbidCharacter(char c)
|
||||
{
|
||||
uint codePoint = (uint)c;
|
||||
int index = (int)(codePoint >> 5);
|
||||
int offset = (int)(codePoint & 0x1FU);
|
||||
_allowedCharsBitmap[index] &= ~(0x1U << offset);
|
||||
}
|
||||
|
||||
public void ForbidUndefinedCharacters()
|
||||
{
|
||||
// Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
|
||||
// (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
|
||||
uint[] definedCharactersBitmap = UnicodeHelpers.GetDefinedCharacterBitmap();
|
||||
Debug.Assert(definedCharactersBitmap.Length == _allowedCharsBitmap.Length);
|
||||
for (int i = 0; i < _allowedCharsBitmap.Length; i++)
|
||||
{
|
||||
_allowedCharsBitmap[i] &= definedCharactersBitmap[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Determines whether the given character can be returned unencoded.
|
||||
public bool IsCharacterAllowed(char c)
|
||||
{
|
||||
uint codePoint = (uint)c;
|
||||
int index = (int)(codePoint >> 5);
|
||||
int offset = (int)(codePoint & 0x1FU);
|
||||
return ((_allowedCharsBitmap[index] >> offset) & 0x1U) != 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,312 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents a filter which allows only certain Unicode code points through.
|
||||
/// </summary>
|
||||
public sealed class CodePointFilter : ICodePointFilter
|
||||
{
|
||||
private AllowedCharsBitmap _allowedCharsBitmap;
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an empty filter (allows no code points through by default).
|
||||
/// </summary>
|
||||
public CodePointFilter()
|
||||
{
|
||||
_allowedCharsBitmap = AllowedCharsBitmap.CreateNew();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates the filter by cloning the allow list of another <see cref="ICodePointFilter"/>.
|
||||
/// </summary>
|
||||
public CodePointFilter(ICodePointFilter other)
|
||||
{
|
||||
if (other == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(other));
|
||||
}
|
||||
|
||||
CodePointFilter otherAsCodePointFilter = other as CodePointFilter;
|
||||
if (otherAsCodePointFilter != null)
|
||||
{
|
||||
_allowedCharsBitmap = otherAsCodePointFilter.GetAllowedCharsBitmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
_allowedCharsBitmap = AllowedCharsBitmap.CreateNew();
|
||||
AllowFilter(other);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates the filter where only the character ranges specified by <paramref name="allowedRanges"/>
|
||||
/// are allowed by the filter.
|
||||
/// </summary>
|
||||
public CodePointFilter(params UnicodeRange[] allowedRanges)
|
||||
{
|
||||
_allowedCharsBitmap = AllowedCharsBitmap.CreateNew();
|
||||
AllowRanges(allowedRanges);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Allows the character specified by <paramref name="c"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter AllowChar(char c)
|
||||
{
|
||||
_allowedCharsBitmap.AllowCharacter(c);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Allows all characters specified by <paramref name="chars"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter AllowChars(params char[] chars)
|
||||
{
|
||||
if (chars != null)
|
||||
{
|
||||
for (int i = 0; i < chars.Length; i++)
|
||||
{
|
||||
_allowedCharsBitmap.AllowCharacter(chars[i]);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Allows all characters in the string <paramref name="chars"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter AllowChars(string chars)
|
||||
{
|
||||
if (chars == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(chars));
|
||||
}
|
||||
|
||||
for (int i = 0; i < chars.Length; i++)
|
||||
{
|
||||
_allowedCharsBitmap.AllowCharacter(chars[i]);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Allows all characters specified by <paramref name="filter"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter AllowFilter(ICodePointFilter filter)
|
||||
{
|
||||
if (filter == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(filter));
|
||||
}
|
||||
|
||||
foreach (var allowedCodePoint in filter.GetAllowedCodePoints())
|
||||
{
|
||||
// If the code point can't be represented as a BMP character, skip it.
|
||||
char codePointAsChar = (char)allowedCodePoint;
|
||||
if (allowedCodePoint == codePointAsChar)
|
||||
{
|
||||
_allowedCharsBitmap.AllowCharacter(codePointAsChar);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Allows all characters specified by <paramref name="range"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter AllowRange(UnicodeRange range)
|
||||
{
|
||||
if (range == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(range));
|
||||
}
|
||||
|
||||
int firstCodePoint = range.FirstCodePoint;
|
||||
int rangeSize = range.RangeSize;
|
||||
for (int i = 0; i < rangeSize; i++)
|
||||
{
|
||||
_allowedCharsBitmap.AllowCharacter((char)(firstCodePoint + i));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Allows all characters specified by <paramref name="ranges"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter AllowRanges(params UnicodeRange[] ranges)
|
||||
{
|
||||
if (ranges != null)
|
||||
{
|
||||
for (int i = 0; i < ranges.Length; i++)
|
||||
{
|
||||
AllowRange(ranges[i]);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resets this filter by disallowing all characters.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter Clear()
|
||||
{
|
||||
_allowedCharsBitmap.Clear();
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disallows the character <paramref name="c"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter ForbidChar(char c)
|
||||
{
|
||||
_allowedCharsBitmap.ForbidCharacter(c);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disallows all characters specified by <paramref name="chars"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter ForbidChars(params char[] chars)
|
||||
{
|
||||
if (chars != null)
|
||||
{
|
||||
for (int i = 0; i < chars.Length; i++)
|
||||
{
|
||||
_allowedCharsBitmap.ForbidCharacter(chars[i]);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disallows all characters in the string <paramref name="chars"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter ForbidChars(string chars)
|
||||
{
|
||||
if (chars == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(chars));
|
||||
}
|
||||
|
||||
for (int i = 0; i < chars.Length; i++)
|
||||
{
|
||||
_allowedCharsBitmap.ForbidCharacter(chars[i]);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disallows all characters specified by <paramref name="range"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter ForbidRange(UnicodeRange range)
|
||||
{
|
||||
if (range == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(range));
|
||||
}
|
||||
|
||||
int firstCodePoint = range.FirstCodePoint;
|
||||
int rangeSize = range.RangeSize;
|
||||
for (int i = 0; i < rangeSize; i++)
|
||||
{
|
||||
_allowedCharsBitmap.ForbidCharacter((char)(firstCodePoint + i));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disallows all characters specified by <paramref name="ranges"/> through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter ForbidRanges(params UnicodeRange[] ranges)
|
||||
{
|
||||
if (ranges != null)
|
||||
{
|
||||
for (int i = 0; i < ranges.Length; i++)
|
||||
{
|
||||
ForbidRange(ranges[i]);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves the bitmap of allowed characters from this filter.
|
||||
/// The returned bitmap is a clone of the original bitmap to avoid unintentional modification.
|
||||
/// </summary>
|
||||
internal AllowedCharsBitmap GetAllowedCharsBitmap()
|
||||
{
|
||||
return _allowedCharsBitmap.Clone();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets an enumeration of all allowed code points.
|
||||
/// </summary>
|
||||
public IEnumerable<int> GetAllowedCodePoints()
|
||||
{
|
||||
for (int i = 0; i < 0x10000; i++)
|
||||
{
|
||||
if (_allowedCharsBitmap.IsCharacterAllowed((char)i))
|
||||
{
|
||||
yield return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a value stating whether the character <paramref name="c"/> is allowed through the filter.
|
||||
/// </summary>
|
||||
public bool IsCharacterAllowed(char c)
|
||||
{
|
||||
return _allowedCharsBitmap.IsCharacterAllowed(c);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wraps the provided filter as a CodePointFilter, avoiding the clone if possible.
|
||||
/// </summary>
|
||||
internal static CodePointFilter Wrap(ICodePointFilter filter)
|
||||
{
|
||||
return (filter as CodePointFilter) ?? new CodePointFilter(filter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
internal static class EncoderCommon
|
||||
{
|
||||
// Gets the optimal capacity of the StringBuilder that will be used to build the output
|
||||
// given a specified number of input characters and the worst-case growth.
|
||||
public static int GetCapacityOfOutputStringBuilder(int numCharsToEncode, int worstCaseOutputCharsPerInputChar)
|
||||
{
|
||||
// We treat 32KB byte size (16k chars) as a soft upper boundary for the length of any StringBuilder
|
||||
// that we allocate. We'll try to avoid going above this boundary if we can avoid it so that we
|
||||
// don't allocate objects on the LOH.
|
||||
const int upperBound = 16 * 1024;
|
||||
|
||||
// Once we have chosen an initial value for the StringBuilder size, the StringBuilder type will
|
||||
// efficiently allocate additionally blocks if necessary.
|
||||
|
||||
if (numCharsToEncode >= upperBound)
|
||||
{
|
||||
// We know that the output will contain at least as many characters as the input, so if the
|
||||
// input length exceeds the soft upper boundary just preallocate the entire builder and hope for
|
||||
// a best-case outcome.
|
||||
return numCharsToEncode;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Allocate the worst-case if we can, but don't exceed the soft upper boundary.
|
||||
long worstCaseTotalChars = (long)numCharsToEncode * worstCaseOutputCharsPerInputChar;
|
||||
return (int)Math.Min(upperBound, worstCaseTotalChars);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Helpful extension methods for the encoder classes.
|
||||
/// </summary>
|
||||
public static class EncoderExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// HTML-encodes a string and writes the result to the supplied output.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The encoded value is also safe for inclusion inside an HTML attribute
|
||||
/// as long as the attribute value is surrounded by single or double quotes.
|
||||
/// </remarks>
|
||||
public static void HtmlEncode(this IHtmlEncoder htmlEncoder, string value, TextWriter output)
|
||||
{
|
||||
if (htmlEncoder == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(htmlEncoder));
|
||||
}
|
||||
|
||||
if (!String.IsNullOrEmpty(value))
|
||||
{
|
||||
htmlEncoder.HtmlEncode(value, 0, value.Length, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// JavaScript-escapes a string and writes the result to the supplied output.
|
||||
/// </summary>
|
||||
public static void JavaScriptStringEncode(this IJavaScriptStringEncoder javaScriptStringEncoder, string value, TextWriter output)
|
||||
{
|
||||
if (javaScriptStringEncoder == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(javaScriptStringEncoder));
|
||||
}
|
||||
|
||||
if (!String.IsNullOrEmpty(value))
|
||||
{
|
||||
javaScriptStringEncoder.JavaScriptStringEncode(value, 0, value.Length, output);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// URL-encodes a string and writes the result to the supplied output.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The encoded value is safe for use in the segment, query, or
|
||||
/// fragment portion of a URI.
|
||||
/// </remarks>
|
||||
public static void UrlEncode(this IUrlEncoder urlEncoder, string value, TextWriter output)
|
||||
{
|
||||
if (urlEncoder == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(urlEncoder));
|
||||
}
|
||||
|
||||
if (!String.IsNullOrEmpty(value))
|
||||
{
|
||||
urlEncoder.UrlEncode(value, 0, value.Length, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Contains helpers for dealing with byte-hex char conversions.
|
||||
/// </summary>
|
||||
internal static class HexUtil
|
||||
{
|
||||
/// <summary>
|
||||
/// Converts a number 0 - 15 to its associated hex character '0' - 'F'.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static char IntToChar(uint i)
|
||||
{
|
||||
Debug.Assert(i < 16);
|
||||
return (i < 10) ? (char)('0' + i) : (char)('A' + (i - 10));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the integral form of this hexadecimal character.
|
||||
/// </summary>
|
||||
/// <returns>0 - 15 if the character is valid, -1 if the character is invalid.</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static int ParseHexCharacter(char c)
|
||||
{
|
||||
if ('0' <= c && c <= '9') { return c - '0'; }
|
||||
else if ('A' <= c && c <= 'F') { return c - 'A' + 10; }
|
||||
else if ('a' <= c && c <= 'f') { return c - 'a' + 10; }
|
||||
else { return -1; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the uppercase hex-encoded form of a byte.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static void WriteHexEncodedByte(byte b, out char firstHexChar, out char secondHexChar)
|
||||
{
|
||||
firstHexChar = IntToChar((uint)b >> 4);
|
||||
secondHexChar = IntToChar((uint)b & 0xFU);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,208 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// A class which can perform HTML encoding given an allow list of characters which
|
||||
/// can be represented unencoded.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Instances of this type will always encode a certain set of characters (such as <
|
||||
/// and >), even if the filter provided in the constructor allows such characters.
|
||||
/// Once constructed, instances of this class are thread-safe for multiple callers.
|
||||
/// </remarks>
|
||||
public unsafe sealed class HtmlEncoder : IHtmlEncoder
|
||||
{
|
||||
// The default HtmlEncoder (Basic Latin), instantiated on demand
|
||||
private static HtmlEncoder _defaultEncoder;
|
||||
|
||||
// The inner encoder, responsible for the actual encoding routines
|
||||
private readonly HtmlUnicodeEncoder _innerUnicodeEncoder;
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder using <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
|
||||
/// Any character not in the <see cref="UnicodeRanges.BasicLatin"/> range will be escaped.
|
||||
/// </summary>
|
||||
public HtmlEncoder()
|
||||
: this(HtmlUnicodeEncoder.BasicLatin)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder specifying which Unicode character ranges are allowed to
|
||||
/// pass through the encoder unescaped. Any character not in the set of ranges specified
|
||||
/// by <paramref name="allowedRanges"/> will be escaped.
|
||||
/// </summary>
|
||||
public HtmlEncoder(params UnicodeRange[] allowedRanges)
|
||||
: this(new HtmlUnicodeEncoder(new CodePointFilter(allowedRanges)))
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder using a custom code point filter. Any character not in the
|
||||
/// set returned by <paramref name="filter"/>'s <see cref="ICodePointFilter.GetAllowedCodePoints"/>
|
||||
/// method will be escaped.
|
||||
/// </summary>
|
||||
public HtmlEncoder(ICodePointFilter filter)
|
||||
: this(new HtmlUnicodeEncoder(CodePointFilter.Wrap(filter)))
|
||||
{
|
||||
if (filter == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(filter));
|
||||
}
|
||||
}
|
||||
|
||||
private HtmlEncoder(HtmlUnicodeEncoder innerEncoder)
|
||||
{
|
||||
Debug.Assert(innerEncoder != null);
|
||||
_innerUnicodeEncoder = innerEncoder;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A default instance of <see cref="HtmlEncoder"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This normally corresponds to <see cref="UnicodeRanges.BasicLatin"/>. However, this property is
|
||||
/// settable so that a developer can change the default implementation application-wide.
|
||||
/// </remarks>
|
||||
public static HtmlEncoder Default
|
||||
{
|
||||
get
|
||||
{
|
||||
return Volatile.Read(ref _defaultEncoder) ?? CreateDefaultEncoderSlow();
|
||||
}
|
||||
set
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
Volatile.Write(ref _defaultEncoder, value);
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.NoInlining)] // the JITter can attempt to inline the caller itself without worrying about us
|
||||
private static HtmlEncoder CreateDefaultEncoderSlow()
|
||||
{
|
||||
var onDemandEncoder = new HtmlEncoder();
|
||||
return Interlocked.CompareExchange(ref _defaultEncoder, onDemandEncoder, null) ?? onDemandEncoder;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite HtmlEncode routine.
|
||||
/// </summary>
|
||||
public void HtmlEncode(char[] value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
if (output == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(output));
|
||||
}
|
||||
|
||||
_innerUnicodeEncoder.Encode(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite HtmlEncode routine.
|
||||
/// </summary>
|
||||
public string HtmlEncode(string value)
|
||||
{
|
||||
return _innerUnicodeEncoder.Encode(value);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite HtmlEncode routine.
|
||||
/// </summary>
|
||||
public void HtmlEncode(string value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
if (output == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(output));
|
||||
}
|
||||
|
||||
_innerUnicodeEncoder.Encode(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
private sealed class HtmlUnicodeEncoder : UnicodeEncoderBase
|
||||
{
|
||||
// A singleton instance of the basic latin encoder.
|
||||
private static HtmlUnicodeEncoder _basicLatinSingleton;
|
||||
|
||||
// The worst case encoding is 8 output chars per input char: [input] U+FFFF -> [output] ""
|
||||
// We don't need to worry about astral code points since they consume *two* input chars to
|
||||
// generate at most 10 output chars (""), which equates to 5 output chars per input char.
|
||||
private const int MaxOutputCharsPerInputChar = 8;
|
||||
|
||||
internal HtmlUnicodeEncoder(CodePointFilter filter)
|
||||
: base(filter, MaxOutputCharsPerInputChar)
|
||||
{
|
||||
}
|
||||
|
||||
internal static HtmlUnicodeEncoder BasicLatin
|
||||
{
|
||||
get
|
||||
{
|
||||
HtmlUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
|
||||
if (encoder == null)
|
||||
{
|
||||
encoder = new HtmlUnicodeEncoder(new CodePointFilter(UnicodeRanges.BasicLatin));
|
||||
Volatile.Write(ref _basicLatinSingleton, encoder);
|
||||
}
|
||||
return encoder;
|
||||
}
|
||||
}
|
||||
|
||||
// Writes a scalar value as an HTML-encoded entity.
|
||||
protected override void WriteEncodedScalar(ref Writer writer, uint value)
|
||||
{
|
||||
if (value == (uint)'\"') { writer.Write("""); }
|
||||
else if (value == (uint)'&') { writer.Write("&"); }
|
||||
else if (value == (uint)'<') { writer.Write("<"); }
|
||||
else if (value == (uint)'>') { writer.Write(">"); }
|
||||
else { WriteEncodedScalarAsNumericEntity(ref writer, value); }
|
||||
}
|
||||
|
||||
// Writes a scalar value as an HTML-encoded numeric entity.
|
||||
private static void WriteEncodedScalarAsNumericEntity(ref Writer writer, uint value)
|
||||
{
|
||||
// We're building the characters up in reverse
|
||||
char* chars = stackalloc char[8 /* "FFFFFFFF" */];
|
||||
int numCharsWritten = 0;
|
||||
do
|
||||
{
|
||||
Debug.Assert(numCharsWritten < 8, "Couldn't have written 8 characters out by this point.");
|
||||
// Pop off the last nibble
|
||||
chars[numCharsWritten++] = HexUtil.IntToChar(value & 0xFU);
|
||||
value >>= 4;
|
||||
} while (value != 0);
|
||||
|
||||
// Finally, write out the HTML-encoded scalar value.
|
||||
writer.Write('&');
|
||||
writer.Write('#');
|
||||
writer.Write('x');
|
||||
Debug.Assert(numCharsWritten > 0, "At least one character should've been written.");
|
||||
do
|
||||
{
|
||||
writer.Write(chars[--numCharsWritten]);
|
||||
} while (numCharsWritten != 0);
|
||||
writer.Write(';');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents a filter which allows only certain Unicode code points through.
|
||||
/// </summary>
|
||||
public interface ICodePointFilter
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets an enumeration of all allowed code points.
|
||||
/// </summary>
|
||||
IEnumerable<int> GetAllowedCodePoints();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.IO;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Provides services for HTML-encoding input.
|
||||
/// </summary>
|
||||
public interface IHtmlEncoder
|
||||
{
|
||||
/// <summary>
|
||||
/// HTML-encodes a character array and writes the result to the supplied
|
||||
/// output.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The encoded value is also appropriately encoded for inclusion inside an HTML attribute
|
||||
/// as long as the attribute value is surrounded by single or double quotes.
|
||||
/// </remarks>
|
||||
void HtmlEncode(char[] value, int startIndex, int charCount, TextWriter output);
|
||||
|
||||
/// <summary>
|
||||
/// HTML-encodes a given input string.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The HTML-encoded value, or null if the input string was null.
|
||||
/// </returns>
|
||||
/// <remarks>
|
||||
/// The return value is also appropriately encoded for inclusion inside an HTML attribute
|
||||
/// as long as the attribute value is surrounded by single or double quotes.
|
||||
/// </remarks>
|
||||
string HtmlEncode(string value);
|
||||
|
||||
/// <summary>
|
||||
/// HTML-encodes a given input string and writes the result to the
|
||||
/// supplied output.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The encoded value is also appropriately encoded for inclusion inside an HTML attribute
|
||||
/// as long as the attribute value is surrounded by single or double quotes.
|
||||
/// </remarks>
|
||||
void HtmlEncode(string value, int startIndex, int charCount, TextWriter output);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.IO;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Provides services for JavaScript-escaping strings.
|
||||
/// </summary>
|
||||
public interface IJavaScriptStringEncoder
|
||||
{
|
||||
/// <summary>
|
||||
/// JavaScript-escapes a character array and writes the result to the
|
||||
/// supplied output.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The encoded value is appropriately encoded for inclusion inside a quoted JSON string.
|
||||
/// </remarks>
|
||||
void JavaScriptStringEncode(char[] value, int startIndex, int charCount, TextWriter output);
|
||||
|
||||
/// <summary>
|
||||
/// JavaScript-escapes a given input string.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The JavaScript-escaped value, or null if the input string was null.
|
||||
/// The encoded value is appropriately encoded for inclusion inside a quoted JSON string.
|
||||
/// </returns>
|
||||
string JavaScriptStringEncode(string value);
|
||||
|
||||
/// <summary>
|
||||
/// JavaScript-escapes a given input string and writes the
|
||||
/// result to the supplied output.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The encoded value is appropriately encoded for inclusion inside a quoted JSON string.
|
||||
/// </remarks>
|
||||
void JavaScriptStringEncode(string value, int startIndex, int charCount, TextWriter output);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.IO;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Provides services for URL-escaping strings.
|
||||
/// </summary>
|
||||
public interface IUrlEncoder
|
||||
{
|
||||
/// <summary>
|
||||
/// URL-escapes a character array and writes the result to the supplied
|
||||
/// output.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The encoded value is appropriately encoded for inclusion in the segment, query, or
|
||||
/// fragment portion of a URI.
|
||||
/// </remarks>
|
||||
void UrlEncode(char[] value, int startIndex, int charCount, TextWriter output);
|
||||
|
||||
/// <summary>
|
||||
/// URL-escapes a given input string.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The URL-escaped value, or null if the input string was null.
|
||||
/// </returns>
|
||||
/// <remarks>
|
||||
/// The return value is appropriately encoded for inclusion in the segment, query, or
|
||||
/// fragment portion of a URI.
|
||||
/// </remarks>
|
||||
string UrlEncode(string value);
|
||||
|
||||
/// <summary>
|
||||
/// URL-escapes a string and writes the result to the supplied output.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The encoded value is appropriately encoded for inclusion in the segment, query, or
|
||||
/// fragment portion of a URI.
|
||||
/// </remarks>
|
||||
void UrlEncode(string value, int startIndex, int charCount, TextWriter output);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,231 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// A class which can perform JavaScript string escaping given an allow list of characters which
|
||||
/// can be represented unescaped.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Instances of this type will always encode a certain set of characters (such as '
|
||||
/// and "), even if the filter provided in the constructor allows such characters.
|
||||
/// Once constructed, instances of this class are thread-safe for multiple callers.
|
||||
/// </remarks>
|
||||
public sealed class JavaScriptStringEncoder : IJavaScriptStringEncoder
|
||||
{
|
||||
// The default JavaScript string encoder (Basic Latin), instantiated on demand
|
||||
private static JavaScriptStringEncoder _defaultEncoder;
|
||||
|
||||
// The inner encoder, responsible for the actual encoding routines
|
||||
private readonly JavaScriptStringUnicodeEncoder _innerUnicodeEncoder;
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder using <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
|
||||
/// Any character not in the <see cref="UnicodeRanges.BasicLatin"/> range will be escaped.
|
||||
/// </summary>
|
||||
public JavaScriptStringEncoder()
|
||||
: this(JavaScriptStringUnicodeEncoder.BasicLatin)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder specifying which Unicode character ranges are allowed to
|
||||
/// pass through the encoder unescaped. Any character not in the set of ranges specified
|
||||
/// by <paramref name="allowedRanges"/> will be escaped.
|
||||
/// </summary>
|
||||
public JavaScriptStringEncoder(params UnicodeRange[] allowedRanges)
|
||||
: this(new JavaScriptStringUnicodeEncoder(new CodePointFilter(allowedRanges)))
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder using a custom code point filter. Any character not in the
|
||||
/// set returned by <paramref name="filter"/>'s <see cref="ICodePointFilter.GetAllowedCodePoints"/>
|
||||
/// method will be escaped.
|
||||
/// </summary>
|
||||
public JavaScriptStringEncoder(ICodePointFilter filter)
|
||||
: this(new JavaScriptStringUnicodeEncoder(CodePointFilter.Wrap(filter)))
|
||||
{
|
||||
if (filter == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(filter));
|
||||
}
|
||||
}
|
||||
|
||||
private JavaScriptStringEncoder(JavaScriptStringUnicodeEncoder innerEncoder)
|
||||
{
|
||||
Debug.Assert(innerEncoder != null);
|
||||
_innerUnicodeEncoder = innerEncoder;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A default instance of <see cref="JavaScriptStringEncoder"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This normally corresponds to <see cref="UnicodeRanges.BasicLatin"/>. However, this property is
|
||||
/// settable so that a developer can change the default implementation application-wide.
|
||||
/// </remarks>
|
||||
public static JavaScriptStringEncoder Default
|
||||
{
|
||||
get
|
||||
{
|
||||
return Volatile.Read(ref _defaultEncoder) ?? CreateDefaultEncoderSlow();
|
||||
}
|
||||
set
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
Volatile.Write(ref _defaultEncoder, value);
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.NoInlining)] // the JITter can attempt to inline the caller itself without worrying about us
|
||||
private static JavaScriptStringEncoder CreateDefaultEncoderSlow()
|
||||
{
|
||||
var onDemandEncoder = new JavaScriptStringEncoder();
|
||||
return Interlocked.CompareExchange(ref _defaultEncoder, onDemandEncoder, null) ?? onDemandEncoder;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite JavaScriptStringEncode routine.
|
||||
/// </summary>
|
||||
public void JavaScriptStringEncode(char[] value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
if (output == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(output));
|
||||
}
|
||||
|
||||
_innerUnicodeEncoder.Encode(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite JavaScriptStringEncode routine.
|
||||
/// </summary>
|
||||
public string JavaScriptStringEncode(string value)
|
||||
{
|
||||
return _innerUnicodeEncoder.Encode(value);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite JavaScriptStringEncode routine.
|
||||
/// </summary>
|
||||
public void JavaScriptStringEncode(string value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
if (output == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(output));
|
||||
}
|
||||
|
||||
_innerUnicodeEncoder.Encode(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
private sealed class JavaScriptStringUnicodeEncoder : UnicodeEncoderBase
|
||||
{
|
||||
// A singleton instance of the basic latin encoder.
|
||||
private static JavaScriptStringUnicodeEncoder _basicLatinSingleton;
|
||||
|
||||
// The worst case encoding is 6 output chars per input char: [input] U+FFFF -> [output] "\uFFFF"
|
||||
// We don't need to worry about astral code points since they're represented as encoded
|
||||
// surrogate pairs in the output.
|
||||
private const int MaxOutputCharsPerInputChar = 6;
|
||||
|
||||
internal JavaScriptStringUnicodeEncoder(CodePointFilter filter)
|
||||
: base(filter, MaxOutputCharsPerInputChar)
|
||||
{
|
||||
// The only interesting characters above and beyond what the base encoder
|
||||
// already covers are the solidus and reverse solidus.
|
||||
ForbidCharacter('\\');
|
||||
ForbidCharacter('/');
|
||||
}
|
||||
|
||||
internal static JavaScriptStringUnicodeEncoder BasicLatin
|
||||
{
|
||||
get
|
||||
{
|
||||
JavaScriptStringUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
|
||||
if (encoder == null)
|
||||
{
|
||||
encoder = new JavaScriptStringUnicodeEncoder(new CodePointFilter(UnicodeRanges.BasicLatin));
|
||||
Volatile.Write(ref _basicLatinSingleton, encoder);
|
||||
}
|
||||
return encoder;
|
||||
}
|
||||
}
|
||||
|
||||
// Writes a scalar value as a JavaScript-escaped character (or sequence of characters).
|
||||
// See ECMA-262, Sec. 7.8.4, and ECMA-404, Sec. 9
|
||||
// http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.4
|
||||
// http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
|
||||
protected override void WriteEncodedScalar(ref Writer writer, uint value)
|
||||
{
|
||||
// ECMA-262 allows encoding U+000B as "\v", but ECMA-404 does not.
|
||||
// Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/".
|
||||
// (In ECMA-262 this character is a NonEscape character.)
|
||||
// HTML-specific characters (including apostrophe and quotes) will
|
||||
// be written out as numeric entities for defense-in-depth.
|
||||
// See UnicodeEncoderBase ctor comments for more info.
|
||||
|
||||
if (value == (uint)'\b') { writer.Write(@"\b"); }
|
||||
else if (value == (uint)'\t') { writer.Write(@"\t"); }
|
||||
else if (value == (uint)'\n') { writer.Write(@"\n"); }
|
||||
else if (value == (uint)'\f') { writer.Write(@"\f"); }
|
||||
else if (value == (uint)'\r') { writer.Write(@"\r"); }
|
||||
else if (value == (uint)'/') { writer.Write(@"\/"); }
|
||||
else if (value == (uint)'\\') { writer.Write(@"\\"); }
|
||||
else { WriteEncodedScalarAsNumericEntity(ref writer, value); }
|
||||
}
|
||||
|
||||
// Writes a scalar value as an JavaScript-escaped character (or sequence of characters).
|
||||
private static void WriteEncodedScalarAsNumericEntity(ref Writer writer, uint value)
|
||||
{
|
||||
if (UnicodeHelpers.IsSupplementaryCodePoint((int)value))
|
||||
{
|
||||
// Convert this back to UTF-16 and write out both characters.
|
||||
char leadingSurrogate, trailingSurrogate;
|
||||
UnicodeHelpers.GetUtf16SurrogatePairFromAstralScalarValue((int)value, out leadingSurrogate, out trailingSurrogate);
|
||||
WriteEncodedSingleCharacter(ref writer, leadingSurrogate);
|
||||
WriteEncodedSingleCharacter(ref writer, trailingSurrogate);
|
||||
}
|
||||
else
|
||||
{
|
||||
// This is only a single character.
|
||||
WriteEncodedSingleCharacter(ref writer, value);
|
||||
}
|
||||
}
|
||||
|
||||
// Writes an encoded scalar value (in the BMP) as a JavaScript-escaped character.
|
||||
private static void WriteEncodedSingleCharacter(ref Writer writer, uint value)
|
||||
{
|
||||
Debug.Assert(!UnicodeHelpers.IsSupplementaryCodePoint((int)value), "The incoming value should've been in the BMP.");
|
||||
|
||||
// Encode this as 6 chars "\uFFFF".
|
||||
writer.Write('\\');
|
||||
writer.Write('u');
|
||||
writer.Write(HexUtil.IntToChar(value >> 12));
|
||||
writer.Write(HexUtil.IntToChar((value >> 8) & 0xFU));
|
||||
writer.Write(HexUtil.IntToChar((value >> 4) & 0xFU));
|
||||
writer.Write(HexUtil.IntToChar(value & 0xFU));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion>
|
||||
<VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VSToolsPath)\DNX\Microsoft.DNX.Props" Condition="'$(VSToolsPath)' != ''" />
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>be9112cb-d87d-4080-9cc3-24492d49cbe6</ProjectGuid>
|
||||
<BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">..\..\artifacts\obj\$(MSBuildProjectName)</BaseIntermediateOutputPath>
|
||||
<OutputPath Condition="'$(OutputPath)'=='' ">..\..\artifacts\bin\$(MSBuildProjectName)\</OutputPath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<SchemaVersion>2.0</SchemaVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VSToolsPath)\DNX\Microsoft.DNX.targets" Condition="'$(VSToolsPath)' != ''" />
|
||||
</Project>
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.Reflection;
|
||||
using System.Resources;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("Microsoft.Extensions.WebEncoders.Tests")]
|
||||
[assembly: AssemblyMetadata("Serviceable", "True")]
|
||||
[assembly: NeutralResourcesLanguage("en-us")]
|
||||
|
|
@ -1,301 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
internal unsafe abstract class UnicodeEncoderBase
|
||||
{
|
||||
// A bitmap of characters which are allowed to be returned unescaped.
|
||||
private AllowedCharsBitmap _allowedCharsBitmap;
|
||||
|
||||
// The worst-case number of output chars generated for any input char.
|
||||
private readonly int _maxOutputCharsPerInputChar;
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder using a custom allow list of characters.
|
||||
/// </summary>
|
||||
protected UnicodeEncoderBase(CodePointFilter filter, int maxOutputCharsPerInputChar)
|
||||
{
|
||||
_maxOutputCharsPerInputChar = maxOutputCharsPerInputChar;
|
||||
_allowedCharsBitmap = filter.GetAllowedCharsBitmap();
|
||||
|
||||
// Forbid characters that are special in HTML.
|
||||
// Even though this is a common encoder used by everybody (including URL
|
||||
// and JavaScript strings), it's unfortunately common for developers to
|
||||
// forget to HTML-encode a string once it has been URL-encoded or
|
||||
// JavaScript string-escaped, so this offers extra protection.
|
||||
ForbidCharacter('<');
|
||||
ForbidCharacter('>');
|
||||
ForbidCharacter('&');
|
||||
ForbidCharacter('\''); // can be used to escape attributes
|
||||
ForbidCharacter('\"'); // can be used to escape attributes
|
||||
ForbidCharacter('+'); // technically not HTML-specific, but can be used to perform UTF7-based attacks
|
||||
|
||||
// Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
|
||||
// (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
|
||||
_allowedCharsBitmap.ForbidUndefinedCharacters();
|
||||
}
|
||||
|
||||
// Marks a character as forbidden (must be returned encoded)
|
||||
protected void ForbidCharacter(char c)
|
||||
{
|
||||
_allowedCharsBitmap.ForbidCharacter(c);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Entry point to the encoder.
|
||||
/// </summary>
|
||||
public void Encode(char[] value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
// Input checking
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
if (output == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(output));
|
||||
}
|
||||
ValidateInputs(startIndex, charCount, actualInputLength: value.Length);
|
||||
|
||||
if (charCount != 0)
|
||||
{
|
||||
fixed (char* pChars = value)
|
||||
{
|
||||
int indexOfFirstCharWhichRequiresEncoding = GetIndexOfFirstCharWhichRequiresEncoding(&pChars[startIndex], charCount);
|
||||
if (indexOfFirstCharWhichRequiresEncoding < 0)
|
||||
{
|
||||
// All chars are valid - just copy the buffer as-is.
|
||||
output.Write(value, startIndex, charCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Flush all chars which are known to be valid, then encode the remainder individually
|
||||
if (indexOfFirstCharWhichRequiresEncoding > 0)
|
||||
{
|
||||
output.Write(value, startIndex, indexOfFirstCharWhichRequiresEncoding);
|
||||
}
|
||||
EncodeCore(&pChars[startIndex + indexOfFirstCharWhichRequiresEncoding], (uint)(charCount - indexOfFirstCharWhichRequiresEncoding), output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Entry point to the encoder.
|
||||
/// </summary>
|
||||
public string Encode(string value)
|
||||
{
|
||||
if (String.IsNullOrEmpty(value))
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
// Quick check: does the string need to be encoded at all?
|
||||
// If not, just return the input string as-is.
|
||||
for (int i = 0; i < value.Length; i++)
|
||||
{
|
||||
if (!IsCharacterAllowed(value[i]))
|
||||
{
|
||||
return EncodeCore(value, idxOfFirstCharWhichRequiresEncoding: i);
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Entry point to the encoder.
|
||||
/// </summary>
|
||||
public void Encode(string value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
// Input checking
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
if (output == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(output));
|
||||
}
|
||||
ValidateInputs(startIndex, charCount, actualInputLength: value.Length);
|
||||
|
||||
if (charCount != 0)
|
||||
{
|
||||
fixed (char* pChars = value)
|
||||
{
|
||||
if (charCount == value.Length)
|
||||
{
|
||||
// Optimize for the common case: we're being asked to encode the entire input string
|
||||
// (not just a subset). If all characters are safe, we can just spit it out as-is.
|
||||
int indexOfFirstCharWhichRequiresEncoding = GetIndexOfFirstCharWhichRequiresEncoding(pChars, charCount);
|
||||
if (indexOfFirstCharWhichRequiresEncoding < 0)
|
||||
{
|
||||
output.Write(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Flush all chars which are known to be valid, then encode the remainder individually
|
||||
for (int i = 0; i < indexOfFirstCharWhichRequiresEncoding; i++)
|
||||
{
|
||||
output.Write(pChars[i]);
|
||||
}
|
||||
EncodeCore(&pChars[indexOfFirstCharWhichRequiresEncoding], (uint)(charCount - indexOfFirstCharWhichRequiresEncoding), output);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// We're being asked to encode a subset, so we need to go through the slow path of appending
|
||||
// each character individually.
|
||||
EncodeCore(&pChars[startIndex], (uint)charCount, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private string EncodeCore(string input, int idxOfFirstCharWhichRequiresEncoding)
|
||||
{
|
||||
Debug.Assert(idxOfFirstCharWhichRequiresEncoding >= 0);
|
||||
Debug.Assert(idxOfFirstCharWhichRequiresEncoding < input.Length);
|
||||
|
||||
int numCharsWhichMayRequireEncoding = input.Length - idxOfFirstCharWhichRequiresEncoding;
|
||||
int sbCapacity = checked(idxOfFirstCharWhichRequiresEncoding + EncoderCommon.GetCapacityOfOutputStringBuilder(numCharsWhichMayRequireEncoding, _maxOutputCharsPerInputChar));
|
||||
Debug.Assert(sbCapacity >= input.Length);
|
||||
|
||||
// Allocate the StringBuilder with the first (known to not require encoding) part of the input string,
|
||||
// then begin encoding from the last (potentially requiring encoding) part of the input string.
|
||||
StringBuilder builder = new StringBuilder(input, 0, idxOfFirstCharWhichRequiresEncoding, sbCapacity);
|
||||
Writer writer = new Writer(builder);
|
||||
fixed (char* pInput = input)
|
||||
{
|
||||
EncodeCore(ref writer, &pInput[idxOfFirstCharWhichRequiresEncoding], (uint)numCharsWhichMayRequireEncoding);
|
||||
}
|
||||
return builder.ToString();
|
||||
}
|
||||
|
||||
private void EncodeCore(char* input, uint charsRemaining, TextWriter output)
|
||||
{
|
||||
Writer writer = new Writer(output);
|
||||
EncodeCore(ref writer, input, charsRemaining);
|
||||
}
|
||||
|
||||
private void EncodeCore(ref Writer writer, char* input, uint charsRemaining)
|
||||
{
|
||||
while (charsRemaining != 0)
|
||||
{
|
||||
int nextScalar = UnicodeHelpers.GetScalarValueFromUtf16(input, endOfString: (charsRemaining == 1));
|
||||
if (UnicodeHelpers.IsSupplementaryCodePoint(nextScalar))
|
||||
{
|
||||
// Supplementary characters should always be encoded numerically.
|
||||
WriteEncodedScalar(ref writer, (uint)nextScalar);
|
||||
|
||||
// We consume two UTF-16 characters for a single supplementary character.
|
||||
input += 2;
|
||||
charsRemaining -= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Otherwise, this was a BMP character.
|
||||
input++;
|
||||
charsRemaining--;
|
||||
char c = (char)nextScalar;
|
||||
if (IsCharacterAllowed(c))
|
||||
{
|
||||
writer.Write(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteEncodedScalar(ref writer, (uint)nextScalar);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int GetIndexOfFirstCharWhichRequiresEncoding(char* input, int inputLength)
|
||||
{
|
||||
for (int i = 0; i < inputLength; i++)
|
||||
{
|
||||
if (!IsCharacterAllowed(input[i]))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1; // no characters require encoding
|
||||
}
|
||||
|
||||
// Determines whether the given character can be returned unencoded.
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private bool IsCharacterAllowed(char c)
|
||||
{
|
||||
return _allowedCharsBitmap.IsCharacterAllowed(c);
|
||||
}
|
||||
|
||||
private static void ValidateInputs(int startIndex, int charCount, int actualInputLength)
|
||||
{
|
||||
if (startIndex < 0 || startIndex > actualInputLength)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(startIndex));
|
||||
}
|
||||
if (charCount < 0 || charCount > (actualInputLength - startIndex))
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(charCount));
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract void WriteEncodedScalar(ref Writer writer, uint value);
|
||||
|
||||
/// <summary>
|
||||
/// Provides an abstraction over both StringBuilder and TextWriter.
|
||||
/// Declared as a struct so we can allocate on the stack and pass by
|
||||
/// reference. Eliminates chatty virtual dispatches on hot paths.
|
||||
/// </summary>
|
||||
protected struct Writer
|
||||
{
|
||||
private readonly StringBuilder _innerBuilder;
|
||||
private readonly TextWriter _innerWriter;
|
||||
|
||||
public Writer(StringBuilder innerBuilder)
|
||||
{
|
||||
_innerBuilder = innerBuilder;
|
||||
_innerWriter = null;
|
||||
}
|
||||
|
||||
public Writer(TextWriter innerWriter)
|
||||
{
|
||||
_innerBuilder = null;
|
||||
_innerWriter = innerWriter;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Write(char value)
|
||||
{
|
||||
if (_innerBuilder != null)
|
||||
{
|
||||
_innerBuilder.Append(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
_innerWriter.Write(value);
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Write(string value)
|
||||
{
|
||||
if (_innerBuilder != null)
|
||||
{
|
||||
_innerBuilder.Append(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
_innerWriter.Write(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,231 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Contains helpers for dealing with Unicode code points.
|
||||
/// </summary>
|
||||
internal unsafe static class UnicodeHelpers
|
||||
{
|
||||
/// <summary>
|
||||
/// Used for invalid Unicode sequences or other unrepresentable values.
|
||||
/// </summary>
|
||||
private const char UNICODE_REPLACEMENT_CHAR = '\uFFFD';
|
||||
|
||||
/// <summary>
|
||||
/// The last code point defined by the Unicode specification.
|
||||
/// </summary>
|
||||
internal const int UNICODE_LAST_CODEPOINT = 0x10FFFF;
|
||||
|
||||
private static uint[] _definedCharacterBitmap;
|
||||
|
||||
/// <summary>
|
||||
/// Helper method which creates a bitmap of all characters which are
|
||||
/// defined per version 8.0 of the Unicode specification.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||||
private static uint[] CreateDefinedCharacterBitmap()
|
||||
{
|
||||
// The stream should be exactly 8KB in size.
|
||||
var assembly = typeof(UnicodeHelpers).GetTypeInfo().Assembly;
|
||||
var resourceName = assembly.GetName().Name + ".compiler.resources.unicode-defined-chars.bin";
|
||||
|
||||
var stream = assembly.GetManifestResourceStream(resourceName);
|
||||
if (stream.Length != 8 * 1024)
|
||||
{
|
||||
Environment.FailFast("Corrupt data detected.");
|
||||
}
|
||||
|
||||
// Read everything in as raw bytes.
|
||||
byte[] rawData = new byte[8 * 1024];
|
||||
for (int numBytesReadTotal = 0; numBytesReadTotal < rawData.Length;)
|
||||
{
|
||||
int numBytesReadThisIteration = stream.Read(rawData, numBytesReadTotal, rawData.Length - numBytesReadTotal);
|
||||
if (numBytesReadThisIteration == 0)
|
||||
{
|
||||
Environment.FailFast("Corrupt data detected.");
|
||||
}
|
||||
numBytesReadTotal += numBytesReadThisIteration;
|
||||
}
|
||||
|
||||
// Finally, convert the byte[] to a uint[].
|
||||
// The incoming bytes are little-endian.
|
||||
uint[] retVal = new uint[2 * 1024];
|
||||
for (int i = 0; i < retVal.Length; i++)
|
||||
{
|
||||
retVal[i] = (((uint)rawData[4 * i + 3]) << 24)
|
||||
| (((uint)rawData[4 * i + 2]) << 16)
|
||||
| (((uint)rawData[4 * i + 1]) << 8)
|
||||
| (uint)rawData[4 * i];
|
||||
}
|
||||
|
||||
// And we're done!
|
||||
Volatile.Write(ref _definedCharacterBitmap, retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a bitmap of all characters which are defined per version 8.0
|
||||
/// of the Unicode specification.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static uint[] GetDefinedCharacterBitmap()
|
||||
{
|
||||
return Volatile.Read(ref _definedCharacterBitmap) ?? CreateDefinedCharacterBitmap();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Given a UTF-16 character stream, reads the next scalar value from the stream.
|
||||
/// Set 'endOfString' to true if 'pChar' points to the last character in the stream.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static int GetScalarValueFromUtf16(char* pChar, bool endOfString)
|
||||
{
|
||||
// This method is marked as AggressiveInlining to handle the common case of a non-surrogate
|
||||
// character. The surrogate case is handled in the slower fallback code path.
|
||||
char thisChar = *pChar;
|
||||
return (Char.IsSurrogate(thisChar)) ? GetScalarValueFromUtf16Slow(pChar, endOfString) : thisChar;
|
||||
}
|
||||
|
||||
private static int GetScalarValueFromUtf16Slow(char* pChar, bool endOfString)
|
||||
{
|
||||
char firstChar = pChar[0];
|
||||
|
||||
if (!Char.IsSurrogate(firstChar))
|
||||
{
|
||||
Debug.Fail("This case should've been handled by the fast path.");
|
||||
return firstChar;
|
||||
}
|
||||
else if (Char.IsHighSurrogate(firstChar))
|
||||
{
|
||||
if (endOfString)
|
||||
{
|
||||
// unmatched surrogate - substitute
|
||||
return UNICODE_REPLACEMENT_CHAR;
|
||||
}
|
||||
else
|
||||
{
|
||||
char secondChar = pChar[1];
|
||||
if (Char.IsLowSurrogate(secondChar))
|
||||
{
|
||||
// valid surrogate pair - extract codepoint
|
||||
return GetScalarValueFromUtf16SurrogatePair(firstChar, secondChar);
|
||||
}
|
||||
else
|
||||
{
|
||||
// unmatched surrogate - substitute
|
||||
return UNICODE_REPLACEMENT_CHAR;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// unmatched surrogate - substitute
|
||||
Debug.Assert(Char.IsLowSurrogate(firstChar));
|
||||
return UNICODE_REPLACEMENT_CHAR;
|
||||
}
|
||||
}
|
||||
|
||||
private static int GetScalarValueFromUtf16SurrogatePair(char highSurrogate, char lowSurrogate)
|
||||
{
|
||||
Debug.Assert(Char.IsHighSurrogate(highSurrogate));
|
||||
Debug.Assert(Char.IsLowSurrogate(lowSurrogate));
|
||||
|
||||
// See http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf, Table 3.5 for the
|
||||
// details of this conversion. We don't use Char.ConvertToUtf32 because its exception
|
||||
// handling shows up on the hot path, and our caller has already sanitized the inputs.
|
||||
return (lowSurrogate & 0x3ff) | (((highSurrogate & 0x3ff) + (1 << 6)) << 10);
|
||||
}
|
||||
|
||||
internal static void GetUtf16SurrogatePairFromAstralScalarValue(int scalar, out char highSurrogate, out char lowSurrogate)
|
||||
{
|
||||
Debug.Assert(0x10000 <= scalar && scalar <= UNICODE_LAST_CODEPOINT);
|
||||
|
||||
// See http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf, Table 3.5 for the
|
||||
// details of this conversion. We don't use Char.ConvertFromUtf32 because its exception
|
||||
// handling shows up on the hot path, it allocates temporary strings (which we don't want),
|
||||
// and our caller has already sanitized the inputs.
|
||||
|
||||
int x = scalar & 0xFFFF;
|
||||
int u = scalar >> 16;
|
||||
int w = u - 1;
|
||||
highSurrogate = (char)(0xD800 | (w << 6) | (x >> 10));
|
||||
lowSurrogate = (char)(0xDC00 | (x & 0x3FF));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Given a Unicode scalar value, returns the UTF-8 representation of the value.
|
||||
/// The return value's bytes should be popped from the LSB.
|
||||
/// </summary>
|
||||
internal static int GetUtf8RepresentationForScalarValue(uint scalar)
|
||||
{
|
||||
Debug.Assert(scalar <= UNICODE_LAST_CODEPOINT);
|
||||
|
||||
// See http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf, Table 3.6 for the
|
||||
// details of this conversion. We don't use UTF8Encoding since we're encoding
|
||||
// a scalar code point, not a UTF16 character sequence.
|
||||
if (scalar <= 0x7f)
|
||||
{
|
||||
// one byte used: scalar 00000000 0xxxxxxx -> byte sequence 0xxxxxxx
|
||||
byte firstByte = (byte)scalar;
|
||||
return firstByte;
|
||||
}
|
||||
else if (scalar <= 0x7ff)
|
||||
{
|
||||
// two bytes used: scalar 00000yyy yyxxxxxx -> byte sequence 110yyyyy 10xxxxxx
|
||||
byte firstByte = (byte)(0xc0 | (scalar >> 6));
|
||||
byte secondByteByte = (byte)(0x80 | (scalar & 0x3f));
|
||||
return ((secondByteByte << 8) | firstByte);
|
||||
}
|
||||
else if (scalar <= 0xffff)
|
||||
{
|
||||
// three bytes used: scalar zzzzyyyy yyxxxxxx -> byte sequence 1110zzzz 10yyyyyy 10xxxxxx
|
||||
byte firstByte = (byte)(0xe0 | (scalar >> 12));
|
||||
byte secondByte = (byte)(0x80 | ((scalar >> 6) & 0x3f));
|
||||
byte thirdByte = (byte)(0x80 | (scalar & 0x3f));
|
||||
return ((((thirdByte << 8) | secondByte) << 8) | firstByte);
|
||||
}
|
||||
else
|
||||
{
|
||||
// four bytes used: scalar 000uuuuu zzzzyyyy yyxxxxxx -> byte sequence 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
|
||||
byte firstByte = (byte)(0xf0 | (scalar >> 18));
|
||||
byte secondByte = (byte)(0x80 | ((scalar >> 12) & 0x3f));
|
||||
byte thirdByte = (byte)(0x80 | ((scalar >> 6) & 0x3f));
|
||||
byte fourthByte = (byte)(0x80 | (scalar & 0x3f));
|
||||
return ((((((fourthByte << 8) | thirdByte) << 8) | secondByte) << 8) | firstByte);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a value stating whether a character is defined per version 8.0
|
||||
/// of the Unicode specification. Certain classes of characters (control chars,
|
||||
/// private use, surrogates, some whitespace) are considered "undefined" for
|
||||
/// our purposes.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static bool IsCharacterDefined(char c)
|
||||
{
|
||||
uint codePoint = (uint)c;
|
||||
int index = (int)(codePoint >> 5);
|
||||
int offset = (int)(codePoint & 0x1FU);
|
||||
return ((GetDefinedCharacterBitmap()[index] >> offset) & 0x1U) != 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether the given scalar value is in the supplementary plane and thus
|
||||
/// requires 2 characters to be represented in UTF-16 (as a surrogate pair).
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static bool IsSupplementaryCodePoint(int scalar)
|
||||
{
|
||||
return ((scalar & ~((int)Char.MaxValue)) != 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents a contiguous range of Unicode code points.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Currently only the Basic Multilingual Plane is supported.
|
||||
/// </remarks>
|
||||
public sealed class UnicodeRange
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new <see cref="UnicodeRange"/>.
|
||||
/// </summary>
|
||||
/// <param name="firstCodePoint">The first code point in the range.</param>
|
||||
/// <param name="rangeSize">The number of code points in the range.</param>
|
||||
public UnicodeRange(int firstCodePoint, int rangeSize)
|
||||
{
|
||||
// Parameter checking: the first code point and last code point must
|
||||
// lie within the BMP. See http://unicode.org/faq/blocks_ranges.html for more info.
|
||||
if (firstCodePoint < 0 || firstCodePoint > 0xFFFF)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(firstCodePoint));
|
||||
}
|
||||
if (rangeSize < 0 || ((long)firstCodePoint + (long)rangeSize > 0x10000))
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(rangeSize));
|
||||
}
|
||||
|
||||
FirstCodePoint = firstCodePoint;
|
||||
RangeSize = rangeSize;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The first code point in this range.
|
||||
/// </summary>
|
||||
public int FirstCodePoint { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The number of code points in this range.
|
||||
/// </summary>
|
||||
public int RangeSize { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new <see cref="UnicodeRange"/> from a span of characters.
|
||||
/// </summary>
|
||||
/// <param name="firstChar">The first character in the range.</param>
|
||||
/// <param name="lastChar">The last character in the range.</param>
|
||||
/// <returns>The <see cref="UnicodeRange"/> representing this span.</returns>
|
||||
public static UnicodeRange FromSpan(char firstChar, char lastChar)
|
||||
{
|
||||
if (lastChar < firstChar)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(lastChar));
|
||||
}
|
||||
|
||||
return new UnicodeRange(firstChar, 1 + (int)(lastChar - firstChar));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Contains predefined <see cref="UnicodeRange"/> instances which correspond to blocks
|
||||
/// from the Unicode 8.0 specification.
|
||||
/// </summary>
|
||||
public static partial class UnicodeRanges
|
||||
{
|
||||
/// <summary>
|
||||
/// An empty <see cref="UnicodeRange"/>. This range contains no code points.
|
||||
/// </summary>
|
||||
public static UnicodeRange None => Volatile.Read(ref _none) ?? CreateEmptyRange(ref _none);
|
||||
private static UnicodeRange _none;
|
||||
|
||||
/// <summary>
|
||||
/// A <see cref="UnicodeRange"/> which contains all characters in the Unicode Basic
|
||||
/// Multilingual Plane (U+0000..U+FFFF).
|
||||
/// </summary>
|
||||
public static UnicodeRange All => Volatile.Read(ref _all) ?? CreateRange(ref _all, '\u0000', '\uFFFF');
|
||||
private static UnicodeRange _all;
|
||||
|
||||
[MethodImpl(MethodImplOptions.NoInlining)] // the caller should be inlined, not this method
|
||||
private static UnicodeRange CreateEmptyRange(ref UnicodeRange range)
|
||||
{
|
||||
// If the range hasn't been created, create it now.
|
||||
// It's ok if two threads race and one overwrites the other's 'range' value.
|
||||
var newRange = new UnicodeRange(0, 0);
|
||||
Volatile.Write(ref range, newRange);
|
||||
return newRange;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.NoInlining)] // the caller should be inlined, not this method
|
||||
private static UnicodeRange CreateRange(ref UnicodeRange range, char first, char last)
|
||||
{
|
||||
// If the range hasn't been created, create it now.
|
||||
// It's ok if two threads race and one overwrites the other's 'range' value.
|
||||
Debug.Assert(last > first, "Code points were specified out of order.");
|
||||
var newRange = UnicodeRange.FromSpan(first, last);
|
||||
Volatile.Write(ref range, newRange);
|
||||
return newRange;
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,245 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// A class which can perform URL string escaping given an allow list of characters which
|
||||
/// can be represented unescaped.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Instances of this type will always encode a certain set of characters (such as +
|
||||
/// and ?), even if the filter provided in the constructor allows such characters.
|
||||
/// Once constructed, instances of this class are thread-safe for multiple callers.
|
||||
/// </remarks>
|
||||
public sealed class UrlEncoder : IUrlEncoder
|
||||
{
|
||||
// The default URL string encoder (Basic Latin), instantiated on demand
|
||||
private static UrlEncoder _defaultEncoder;
|
||||
|
||||
// The inner encoder, responsible for the actual encoding routines
|
||||
private readonly UrlUnicodeEncoder _innerUnicodeEncoder;
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder using <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
|
||||
/// Any character not in the <see cref="UnicodeRanges.BasicLatin"/> range will be escaped.
|
||||
/// </summary>
|
||||
public UrlEncoder()
|
||||
: this(UrlUnicodeEncoder.BasicLatin)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder specifying which Unicode character ranges are allowed to
|
||||
/// pass through the encoder unescaped. Any character not in the set of ranges specified
|
||||
/// by <paramref name="allowedRanges"/> will be escaped.
|
||||
/// </summary>
|
||||
public UrlEncoder(params UnicodeRange[] allowedRanges)
|
||||
: this(new UrlUnicodeEncoder(new CodePointFilter(allowedRanges)))
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Instantiates an encoder using a custom code point filter. Any character not in the
|
||||
/// set returned by <paramref name="filter"/>'s <see cref="ICodePointFilter.GetAllowedCodePoints"/>
|
||||
/// method will be escaped.
|
||||
/// </summary>
|
||||
public UrlEncoder(ICodePointFilter filter)
|
||||
: this(new UrlUnicodeEncoder(CodePointFilter.Wrap(filter)))
|
||||
{
|
||||
if (filter == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(filter));
|
||||
}
|
||||
}
|
||||
|
||||
private UrlEncoder(UrlUnicodeEncoder innerEncoder)
|
||||
{
|
||||
Debug.Assert(innerEncoder != null);
|
||||
_innerUnicodeEncoder = innerEncoder;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A default instance of <see cref="UrlEncoder"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This normally corresponds to <see cref="UnicodeRanges.BasicLatin"/>. However, this property is
|
||||
/// settable so that a developer can change the default implementation application-wide.
|
||||
/// </remarks>
|
||||
public static UrlEncoder Default
|
||||
{
|
||||
get
|
||||
{
|
||||
return Volatile.Read(ref _defaultEncoder) ?? CreateDefaultEncoderSlow();
|
||||
}
|
||||
set
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
Volatile.Write(ref _defaultEncoder, value);
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.NoInlining)] // the JITter can attempt to inline the caller itself without worrying about us
|
||||
private static UrlEncoder CreateDefaultEncoderSlow()
|
||||
{
|
||||
var onDemandEncoder = new UrlEncoder();
|
||||
return Interlocked.CompareExchange(ref _defaultEncoder, onDemandEncoder, null) ?? onDemandEncoder;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite UrlEncode routine.
|
||||
/// </summary>
|
||||
public void UrlEncode(char[] value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
if (output == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(output));
|
||||
}
|
||||
|
||||
_innerUnicodeEncoder.Encode(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite UrlEncode routine.
|
||||
/// </summary>
|
||||
public string UrlEncode(string value)
|
||||
{
|
||||
return _innerUnicodeEncoder.Encode(value);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Everybody's favorite UrlEncode routine.
|
||||
/// </summary>
|
||||
public void UrlEncode(string value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
if (value == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
if (output == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(output));
|
||||
}
|
||||
|
||||
_innerUnicodeEncoder.Encode(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
private sealed class UrlUnicodeEncoder : UnicodeEncoderBase
|
||||
{
|
||||
// A singleton instance of the basic latin encoder.
|
||||
private static UrlUnicodeEncoder _basicLatinSingleton;
|
||||
|
||||
// We perform UTF8 conversion of input, which means that the worst case is
|
||||
// 9 output chars per input char: [input] U+FFFF -> [output] "%XX%YY%ZZ".
|
||||
// We don't need to worry about astral code points since they consume 2 input
|
||||
// chars to produce 12 output chars "%XX%YY%ZZ%WW", which is 6 output chars per input char.
|
||||
private const int MaxOutputCharsPerInputChar = 9;
|
||||
|
||||
internal UrlUnicodeEncoder(CodePointFilter filter)
|
||||
: base(filter, MaxOutputCharsPerInputChar)
|
||||
{
|
||||
// Per RFC 3987, Sec. 2.2, we want encodings that are safe for
|
||||
// four particular components: 'isegment', 'ipath-noscheme',
|
||||
// 'iquery', and 'ifragment'. The relevant definitions are below.
|
||||
//
|
||||
// ipath-noscheme = isegment-nz-nc *( "/" isegment )
|
||||
//
|
||||
// isegment = *ipchar
|
||||
//
|
||||
// isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
|
||||
// / "@" )
|
||||
// ; non-zero-length segment without any colon ":"
|
||||
//
|
||||
// ipchar = iunreserved / pct-encoded / sub-delims / ":"
|
||||
// / "@"
|
||||
//
|
||||
// iquery = *( ipchar / iprivate / "/" / "?" )
|
||||
//
|
||||
// ifragment = *( ipchar / "/" / "?" )
|
||||
//
|
||||
// iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
|
||||
//
|
||||
// ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
|
||||
// / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
|
||||
// / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
|
||||
// / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
|
||||
// / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
|
||||
// / %xD0000-DFFFD / %xE1000-EFFFD
|
||||
//
|
||||
// pct-encoded = "%" HEXDIG HEXDIG
|
||||
//
|
||||
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
||||
// / "*" / "+" / "," / ";" / "="
|
||||
//
|
||||
// The only common characters between these four components are the
|
||||
// intersection of 'isegment-nz-nc' and 'ipchar', which is really
|
||||
// just 'isegment-nz-nc' (colons forbidden).
|
||||
//
|
||||
// From this list, the base encoder already forbids "&", "'", "+",
|
||||
// and we'll additionally forbid "=" since it has special meaning
|
||||
// in x-www-form-urlencoded representations.
|
||||
//
|
||||
// This means that the full list of allowed characters from the
|
||||
// Basic Latin set is:
|
||||
// ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "(" / ")" / "*" / "," / ";" / "@"
|
||||
|
||||
const string forbiddenChars = @" #%/:=?[\]^`{|}"; // chars from Basic Latin which aren't already disallowed by the base encoder
|
||||
foreach (char c in forbiddenChars)
|
||||
{
|
||||
ForbidCharacter(c);
|
||||
}
|
||||
|
||||
// Specials (U+FFF0 .. U+FFFF) are forbidden by the definition of 'ucschar' above
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
ForbidCharacter((char)(0xFFF0 | i));
|
||||
}
|
||||
|
||||
// Supplementary characters are forbidden anyway by the base encoder
|
||||
}
|
||||
|
||||
internal static UrlUnicodeEncoder BasicLatin
|
||||
{
|
||||
get
|
||||
{
|
||||
UrlUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
|
||||
if (encoder == null)
|
||||
{
|
||||
encoder = new UrlUnicodeEncoder(new CodePointFilter(UnicodeRanges.BasicLatin));
|
||||
Volatile.Write(ref _basicLatinSingleton, encoder);
|
||||
}
|
||||
return encoder;
|
||||
}
|
||||
}
|
||||
|
||||
// Writes a scalar value as a percent-encoded sequence of UTF8 bytes, per RFC 3987.
|
||||
protected override void WriteEncodedScalar(ref Writer writer, uint value)
|
||||
{
|
||||
uint asUtf8 = (uint)UnicodeHelpers.GetUtf8RepresentationForScalarValue(value);
|
||||
do
|
||||
{
|
||||
char highNibble, lowNibble;
|
||||
HexUtil.WriteHexEncodedByte((byte)asUtf8, out highNibble, out lowNibble);
|
||||
writer.Write('%');
|
||||
writer.Write(highNibble);
|
||||
writer.Write(lowNibble);
|
||||
} while ((asUtf8 >>= 8) != 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
|
@ -1,26 +0,0 @@
|
|||
{
|
||||
"version": "1.0.0-*",
|
||||
"description": "Contains core encoders for HTML, JavaScript strings, and URLs.",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git://github.com/aspnet/httpabstractions"
|
||||
},
|
||||
"compilationOptions": {
|
||||
"allowUnsafe": true,
|
||||
"warningsAsErrors": true
|
||||
},
|
||||
"frameworks": {
|
||||
"net451": {},
|
||||
"dotnet5.4": {
|
||||
"dependencies": {
|
||||
"System.ComponentModel": "4.0.1-beta-*",
|
||||
"System.Diagnostics.Debug": "4.0.11-beta-*",
|
||||
"System.IO": "4.0.11-beta-*",
|
||||
"System.Reflection": "4.0.10-*",
|
||||
"System.Resources.ResourceManager": "4.0.1-beta-*",
|
||||
"System.Runtime.Extensions": "4.0.11-beta-*",
|
||||
"System.Threading": "4.0.11-beta-*"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.OptionsModel;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
|
|
@ -31,12 +32,12 @@ namespace Microsoft.Extensions.DependencyInjection
|
|||
|
||||
// Register the default encoders
|
||||
// We want to call the 'Default' property getters lazily since they perform static caching
|
||||
services.TryAdd(ServiceDescriptor.Singleton<IHtmlEncoder>(
|
||||
CreateFactory(() => HtmlEncoder.Default, filter => new HtmlEncoder(filter))));
|
||||
services.TryAdd(ServiceDescriptor.Singleton<IJavaScriptStringEncoder>(
|
||||
CreateFactory(() => JavaScriptStringEncoder.Default, filter => new JavaScriptStringEncoder(filter))));
|
||||
services.TryAdd(ServiceDescriptor.Singleton<IUrlEncoder>(
|
||||
CreateFactory(() => UrlEncoder.Default, filter => new UrlEncoder(filter))));
|
||||
services.TryAdd(ServiceDescriptor.Singleton<HtmlEncoder>(
|
||||
CreateFactory(() => HtmlEncoder.Default, settings => HtmlEncoder.Create(settings))));
|
||||
services.TryAdd(ServiceDescriptor.Singleton<JavaScriptEncoder>(
|
||||
CreateFactory(() => JavaScriptEncoder.Default, settings => JavaScriptEncoder.Create(settings))));
|
||||
services.TryAdd(ServiceDescriptor.Singleton<UrlEncoder>(
|
||||
CreateFactory(() => UrlEncoder.Default, settings => UrlEncoder.Create(settings))));
|
||||
|
||||
if (configureOptions != null)
|
||||
{
|
||||
|
|
@ -48,14 +49,14 @@ namespace Microsoft.Extensions.DependencyInjection
|
|||
|
||||
private static Func<IServiceProvider, T> CreateFactory<T>(
|
||||
Func<T> defaultFactory,
|
||||
Func<ICodePointFilter, T> customFilterFactory)
|
||||
Func<TextEncoderSettings, T> customSettingsFactory)
|
||||
{
|
||||
return serviceProvider =>
|
||||
{
|
||||
var codePointFilter = serviceProvider?.GetService<IOptions<WebEncoderOptions>>()?
|
||||
var settings = serviceProvider?.GetService<IOptions<WebEncoderOptions>>()?
|
||||
.Value?
|
||||
.CodePointFilter;
|
||||
return (codePointFilter != null) ? customFilterFactory(codePointFilter) : defaultFactory();
|
||||
.TextEncoderSettings;
|
||||
return (settings != null) ? customSettingsFactory(settings) : defaultFactory();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
|
|
@ -11,39 +12,39 @@ namespace Microsoft.Extensions.WebEncoders
|
|||
public static class EncoderServiceProviderExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Retrieves an <see cref="IHtmlEncoder"/> from an <see cref="IServiceProvider"/>.
|
||||
/// Retrieves an <see cref="HtmlEncoder"/> from an <see cref="IServiceProvider"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This method is guaranteed never to return null.
|
||||
/// It will return a default encoder instance if <paramref name="serviceProvider"/> does not contain one or is null.
|
||||
/// </remarks>
|
||||
public static IHtmlEncoder GetHtmlEncoder(this IServiceProvider serviceProvider)
|
||||
public static HtmlEncoder GetHtmlEncoder(this IServiceProvider serviceProvider)
|
||||
{
|
||||
return (IHtmlEncoder)serviceProvider?.GetService(typeof(IHtmlEncoder)) ?? HtmlEncoder.Default;
|
||||
return (HtmlEncoder)serviceProvider?.GetService(typeof(HtmlEncoder)) ?? HtmlEncoder.Default;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves an <see cref="IJavaScriptStringEncoder"/> from an <see cref="IServiceProvider"/>.
|
||||
/// Retrieves an <see cref="JavaScriptEncoder"/> from an <see cref="IServiceProvider"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This method is guaranteed never to return null.
|
||||
/// It will return a default encoder instance if <paramref name="serviceProvider"/> does not contain one or is null.
|
||||
/// </remarks>
|
||||
public static IJavaScriptStringEncoder GetJavaScriptStringEncoder(this IServiceProvider serviceProvider)
|
||||
public static JavaScriptEncoder GetJavaScriptEncoder(this IServiceProvider serviceProvider)
|
||||
{
|
||||
return (IJavaScriptStringEncoder)serviceProvider?.GetService(typeof(IJavaScriptStringEncoder)) ?? JavaScriptStringEncoder.Default;
|
||||
return (JavaScriptEncoder)serviceProvider?.GetService(typeof(JavaScriptEncoder)) ?? JavaScriptEncoder.Default;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves an <see cref="IUrlEncoder"/> from an <see cref="IServiceProvider"/>.
|
||||
/// Retrieves an <see cref="UrlEncoder"/> from an <see cref="IServiceProvider"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This method is guaranteed never to return null.
|
||||
/// It will return a default encoder instance if <paramref name="serviceProvider"/> does not contain one or is null.
|
||||
/// </remarks>
|
||||
public static IUrlEncoder GetUrlEncoder(this IServiceProvider serviceProvider)
|
||||
public static UrlEncoder GetUrlEncoder(this IServiceProvider serviceProvider)
|
||||
{
|
||||
return (IUrlEncoder)serviceProvider?.GetService(typeof(IUrlEncoder)) ?? UrlEncoder.Default;
|
||||
return (UrlEncoder)serviceProvider?.GetService(typeof(UrlEncoder)) ?? UrlEncoder.Default;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.IO;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders.Testing
|
||||
{
|
||||
/// <summary>
|
||||
/// Encoder used for unit testing.
|
||||
/// </summary>
|
||||
public sealed class HtmlTestEncoder : HtmlEncoder
|
||||
{
|
||||
public override int MaxOutputCharactersPerInputCharacter
|
||||
{
|
||||
get { return 1; }
|
||||
}
|
||||
|
||||
public override string Encode(string value)
|
||||
{
|
||||
return $"HtmlEncode[[{value}]]";
|
||||
}
|
||||
|
||||
public override void Encode(TextWriter output, char[] value, int startIndex, int characterCount)
|
||||
{
|
||||
output.Write("HtmlEncode[[");
|
||||
output.Write(value, startIndex, characterCount);
|
||||
output.Write("]]");
|
||||
}
|
||||
|
||||
public override void Encode(TextWriter output, string value, int startIndex, int characterCount)
|
||||
{
|
||||
output.Write("HtmlEncode[[");
|
||||
output.Write(value.Substring(startIndex, characterCount));
|
||||
output.Write("]]");
|
||||
}
|
||||
|
||||
public override bool WillEncode(int unicodeScalar)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
|
||||
{
|
||||
numberOfCharactersWritten = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.IO;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders.Testing
|
||||
{
|
||||
/// <summary>
|
||||
/// Encoder used for unit testing.
|
||||
/// </summary>
|
||||
public class JavaScriptTestEncoder : JavaScriptEncoder
|
||||
{
|
||||
public override int MaxOutputCharactersPerInputCharacter
|
||||
{
|
||||
get { return 1; }
|
||||
}
|
||||
|
||||
public override string Encode(string value)
|
||||
{
|
||||
return $"JavaScriptEncode[[{value}]]";
|
||||
}
|
||||
|
||||
public override void Encode(TextWriter output, char[] value, int startIndex, int characterCount)
|
||||
{
|
||||
output.Write("JavaScriptEncode[[");
|
||||
output.Write(value, startIndex, characterCount);
|
||||
output.Write("]]");
|
||||
}
|
||||
|
||||
public override void Encode(TextWriter output, string value, int startIndex, int characterCount)
|
||||
{
|
||||
output.Write("JavaScriptEncode[[");
|
||||
output.Write(value.Substring(startIndex, characterCount));
|
||||
output.Write("]]");
|
||||
}
|
||||
|
||||
public override bool WillEncode(int unicodeScalar)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
|
||||
{
|
||||
numberOfCharactersWritten = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.IO;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders.Testing
|
||||
{
|
||||
/// <summary>
|
||||
/// Encoder used for unit testing.
|
||||
/// </summary>
|
||||
public class UrlTestEncoder : UrlEncoder
|
||||
{
|
||||
public override int MaxOutputCharactersPerInputCharacter
|
||||
{
|
||||
get { return 1; }
|
||||
}
|
||||
|
||||
public override string Encode(string value)
|
||||
{
|
||||
return $"UrlEncode[[{value}]]";
|
||||
}
|
||||
|
||||
public override void Encode(TextWriter output, char[] value, int startIndex, int characterCount)
|
||||
{
|
||||
output.Write("UrlEncode[[");
|
||||
output.Write(value, startIndex, characterCount);
|
||||
output.Write("]]");
|
||||
}
|
||||
|
||||
public override void Encode(TextWriter output, string value, int startIndex, int characterCount)
|
||||
{
|
||||
output.Write("UrlEncode[[");
|
||||
output.Write(value.Substring(startIndex, characterCount));
|
||||
output.Write("]]");
|
||||
}
|
||||
|
||||
public override bool WillEncode(int unicodeScalar)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
|
||||
{
|
||||
numberOfCharactersWritten = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Text.Encodings.Web;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Specifies options common to all three encoders (HtmlEncode, JavaScriptStringEncode, UrlEncode).
|
||||
/// Specifies options common to all three encoders (HtmlEncode, JavaScriptEncode, UrlEncode).
|
||||
/// </summary>
|
||||
public sealed class WebEncoderOptions
|
||||
{
|
||||
|
|
@ -16,6 +16,6 @@ namespace Microsoft.Extensions.WebEncoders
|
|||
/// <remarks>
|
||||
/// If this property is null, then the encoders will use their default allow lists.
|
||||
/// </remarks>
|
||||
public ICodePointFilter CodePointFilter { get; set; }
|
||||
public TextEncoderSettings TextEncoderSettings { get; set; }
|
||||
}
|
||||
}
|
||||
|
|
@ -6,15 +6,21 @@
|
|||
"url": "git://github.com/aspnet/httpabstractions"
|
||||
},
|
||||
"compilationOptions": {
|
||||
"warningsAsErrors": true
|
||||
"warningsAsErrors": true,
|
||||
"allowUnsafe": true
|
||||
},
|
||||
"dependencies": {
|
||||
"Microsoft.Extensions.DependencyInjection.Abstractions": "1.0.0-*",
|
||||
"Microsoft.Extensions.OptionsModel": "1.0.0-*",
|
||||
"Microsoft.Extensions.WebEncoders.Core": "1.0.0-*"
|
||||
"System.Text.Encodings.Web": "4.0.0-beta-*"
|
||||
},
|
||||
"frameworks": {
|
||||
"net451": {},
|
||||
"net451": {
|
||||
"frameworkAssemblies": {
|
||||
"System.IO": "",
|
||||
"System.Runtime": ""
|
||||
}
|
||||
},
|
||||
"dotnet5.4": {}
|
||||
}
|
||||
}
|
||||
|
|
@ -5,8 +5,9 @@ using System;
|
|||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.AspNet.Testing;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
using Microsoft.Extensions.WebEncoders.Testing;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.AspNet.Html.Abstractions.Test
|
||||
|
|
@ -345,7 +346,7 @@ namespace Microsoft.AspNet.Html.Abstractions.Test
|
|||
{
|
||||
using (var writer = new StringWriter())
|
||||
{
|
||||
content.WriteTo(writer, new CommonTestEncoder());
|
||||
content.WriteTo(writer, new HtmlTestEncoder());
|
||||
return writer.ToString();
|
||||
}
|
||||
}
|
||||
|
|
@ -378,7 +379,7 @@ namespace Microsoft.AspNet.Html.Abstractions.Test
|
|||
return this;
|
||||
}
|
||||
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
foreach (var entry in Entries)
|
||||
{
|
||||
|
|
@ -396,7 +397,7 @@ namespace Microsoft.AspNet.Html.Abstractions.Test
|
|||
|
||||
public string Value { get; }
|
||||
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
writer.Write(Value);
|
||||
}
|
||||
|
|
@ -411,9 +412,9 @@ namespace Microsoft.AspNet.Html.Abstractions.Test
|
|||
|
||||
public string Value { get; }
|
||||
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
encoder.HtmlEncode(Value, writer);
|
||||
encoder.Encode(writer, Value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -426,7 +427,7 @@ namespace Microsoft.AspNet.Html.Abstractions.Test
|
|||
|
||||
public string Value { get; }
|
||||
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
"dependencies": {
|
||||
"Microsoft.AspNet.Html.Abstractions": "1.0.0-*",
|
||||
"Microsoft.AspNet.Testing": "1.0.0-*",
|
||||
"Microsoft.Extensions.WebEncoders": "1.0.0-*",
|
||||
"xunit.runner.aspnet": "2.0.0-aspnet-*"
|
||||
},
|
||||
"compile": [ "../Microsoft.Extensions.WebEncoders.Tests/CommonTestEncoder.cs" ],
|
||||
"commands": {
|
||||
"test": "xunit.runner.aspnet"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -56,7 +56,6 @@ namespace Microsoft.AspNet.Http.Abstractions
|
|||
[InlineData("name", "", "?name=")]
|
||||
[InlineData("", "value", "?=value")]
|
||||
[InlineData("", "", "?=")]
|
||||
[InlineData(null, null, "?=")]
|
||||
public void CreateNameValue_Success(string name, string value, string exepcted)
|
||||
{
|
||||
var query = QueryString.Create(name, value);
|
||||
|
|
@ -94,7 +93,6 @@ namespace Microsoft.AspNet.Http.Abstractions
|
|||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(null, null, null, "?=")]
|
||||
[InlineData("", "", "", "?=")]
|
||||
[InlineData("?", "", "", "?=")]
|
||||
[InlineData("?", "name2", "value2", "?name2=value2")]
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@
|
|||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.IO;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.AspNet.Html.Abstractions;
|
||||
using Microsoft.Extensions.WebEncoders;
|
||||
using Microsoft.Extensions.WebEncoders.Testing;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.Internal
|
||||
|
|
@ -34,7 +35,7 @@ namespace Microsoft.Extensions.Internal
|
|||
var writer = new StringWriter();
|
||||
|
||||
// Act
|
||||
content.WriteTo(writer, new CommonTestEncoder());
|
||||
content.WriteTo(writer, new HtmlTestEncoder());
|
||||
|
||||
// Assert
|
||||
Assert.Equal("HtmlEncode[[Hello]]", writer.ToString());
|
||||
|
|
@ -50,7 +51,7 @@ namespace Microsoft.Extensions.Internal
|
|||
var writer = new StringWriter();
|
||||
|
||||
// Act
|
||||
content.WriteTo(writer, new CommonTestEncoder());
|
||||
content.WriteTo(writer, new HtmlTestEncoder());
|
||||
|
||||
// Assert
|
||||
Assert.Equal("Hello", writer.ToString());
|
||||
|
|
@ -69,7 +70,7 @@ namespace Microsoft.Extensions.Internal
|
|||
// Assert
|
||||
var result = Assert.Single(content.Entries);
|
||||
var testHtmlContent = Assert.IsType<TestHtmlContent>(result);
|
||||
testHtmlContent.WriteTo(writer, new CommonTestEncoder());
|
||||
testHtmlContent.WriteTo(writer, new HtmlTestEncoder());
|
||||
Assert.Equal("Written from TestHtmlContent: Hello", writer.ToString());
|
||||
}
|
||||
|
||||
|
|
@ -114,7 +115,7 @@ namespace Microsoft.Extensions.Internal
|
|||
content.Append("Test");
|
||||
|
||||
// Act
|
||||
content.WriteTo(writer, new CommonTestEncoder());
|
||||
content.WriteTo(writer, new HtmlTestEncoder());
|
||||
|
||||
// Assert
|
||||
Assert.Equal(2, content.Entries.Count);
|
||||
|
|
@ -130,7 +131,7 @@ namespace Microsoft.Extensions.Internal
|
|||
_content = content;
|
||||
}
|
||||
|
||||
public void WriteTo(TextWriter writer, IHtmlEncoder encoder)
|
||||
public void WriteTo(TextWriter writer, HtmlEncoder encoder)
|
||||
{
|
||||
writer.Write(ToString());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,10 +4,13 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"Microsoft.AspNet.Html.Abstractions": "1.0.0-*",
|
||||
"Microsoft.Extensions.BufferedHtmlContent.Sources": { "type": "build", "version": "1.0.0-*" },
|
||||
"Microsoft.Extensions.BufferedHtmlContent.Sources": {
|
||||
"type": "build",
|
||||
"version": "1.0.0-*"
|
||||
},
|
||||
"Microsoft.Extensions.WebEncoders.Tests" : "1.0.0-*",
|
||||
"xunit.runner.aspnet": "2.0.0-aspnet-*"
|
||||
},
|
||||
"compile": [ "../Microsoft.Extensions.WebEncoders.Tests/CommonTestEncoder.cs" ],
|
||||
"commands": {
|
||||
"test": "xunit.runner.aspnet"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,125 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class AllowedCharsBitmapTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_EmptyByDefault()
|
||||
{
|
||||
// Act
|
||||
var bitmap = AllowedCharsBitmap.CreateNew();
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(bitmap.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Allow_Forbid_ZigZag()
|
||||
{
|
||||
// Arrange
|
||||
var bitmap = AllowedCharsBitmap.CreateNew();
|
||||
|
||||
// Act
|
||||
// The only chars which are allowed are those whose code points are multiples of 3 or 7
|
||||
// who aren't also multiples of 5. Exception: multiples of 35 are allowed.
|
||||
for (int i = 0; i <= Char.MaxValue; i += 3)
|
||||
{
|
||||
bitmap.AllowCharacter((char)i);
|
||||
}
|
||||
for (int i = 0; i <= Char.MaxValue; i += 5)
|
||||
{
|
||||
bitmap.ForbidCharacter((char)i);
|
||||
}
|
||||
for (int i = 0; i <= Char.MaxValue; i += 7)
|
||||
{
|
||||
bitmap.AllowCharacter((char)i);
|
||||
}
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
bool isAllowed = false;
|
||||
if (i % 3 == 0) { isAllowed = true; }
|
||||
if (i % 5 == 0) { isAllowed = false; }
|
||||
if (i % 7 == 0) { isAllowed = true; }
|
||||
Assert.Equal(isAllowed, bitmap.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Clear_ForbidsEverything()
|
||||
{
|
||||
// Arrange
|
||||
var bitmap = AllowedCharsBitmap.CreateNew();
|
||||
for (int i = 1; i <= Char.MaxValue; i++)
|
||||
{
|
||||
bitmap.AllowCharacter((char)i);
|
||||
}
|
||||
|
||||
// Act
|
||||
bitmap.Clear();
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(bitmap.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Clone_MakesDeepCopy()
|
||||
{
|
||||
// Arrange
|
||||
var originalBitmap = AllowedCharsBitmap.CreateNew();
|
||||
originalBitmap.AllowCharacter('x');
|
||||
|
||||
// Act
|
||||
var clonedBitmap = originalBitmap.Clone();
|
||||
clonedBitmap.AllowCharacter('y');
|
||||
|
||||
// Assert
|
||||
Assert.True(originalBitmap.IsCharacterAllowed('x'));
|
||||
Assert.False(originalBitmap.IsCharacterAllowed('y'));
|
||||
Assert.True(clonedBitmap.IsCharacterAllowed('x'));
|
||||
Assert.True(clonedBitmap.IsCharacterAllowed('y'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidUndefinedCharacters_RemovesUndefinedChars()
|
||||
{
|
||||
// Arrange
|
||||
// We only allow odd-numbered characters in this test so that
|
||||
// we can validate that we properly merged the two bitmaps together
|
||||
// rather than simply overwriting the target.
|
||||
var bitmap = AllowedCharsBitmap.CreateNew();
|
||||
for (int i = 1; i <= Char.MaxValue; i += 2)
|
||||
{
|
||||
bitmap.AllowCharacter((char)i);
|
||||
}
|
||||
|
||||
// Act
|
||||
bitmap.ForbidUndefinedCharacters();
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
Assert.False(bitmap.IsCharacterAllowed((char)i)); // these chars were never allowed in the original description
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert.Equal(UnicodeHelpers.IsCharacterDefined((char)i), bitmap.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,365 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class CodePointFilterTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_Parameterless_CreatesEmptyFilter()
|
||||
{
|
||||
// Act
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_OtherCodePointFilterAsInterface()
|
||||
{
|
||||
// Arrange
|
||||
var originalFilter = new OddCodePointFilter();
|
||||
|
||||
// Act
|
||||
var newFilter = new CodePointFilter(originalFilter);
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.Equal((i % 2) == 1, newFilter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_OtherCodePointFilterAsConcreteType_Clones()
|
||||
{
|
||||
// Arrange
|
||||
var originalFilter = new CodePointFilter().AllowChar('x');
|
||||
|
||||
// Act
|
||||
var newFilter = new CodePointFilter(originalFilter).AllowChar('y');
|
||||
|
||||
// Assert
|
||||
Assert.True(originalFilter.IsCharacterAllowed('x'));
|
||||
Assert.False(originalFilter.IsCharacterAllowed('y'));
|
||||
Assert.True(newFilter.IsCharacterAllowed('x'));
|
||||
Assert.True(newFilter.IsCharacterAllowed('y'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_UnicodeRanges()
|
||||
{
|
||||
// Act
|
||||
var filter = new CodePointFilter(UnicodeRanges.LatinExtendedA, UnicodeRanges.LatinExtendedC);
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i < 0x0100; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0100; i <= 0x017F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0180; i < 0x2C60; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x2C60; i <= 0x2C7F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x2C80; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowChar()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowChar('\u0100');
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('\u0100'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0101'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowChars_Array()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowChars('\u0100', '\u0102');
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('\u0100'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0101'));
|
||||
Assert.True(filter.IsCharacterAllowed('\u0102'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0103'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowChars_String()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowChars("\u0100\u0102");
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('\u0100'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0101'));
|
||||
Assert.True(filter.IsCharacterAllowed('\u0102'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0103'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowFilter()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeRanges.BasicLatin);
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowFilter(new OddCodePointFilter());
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i <= 0x007F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0080; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowRange()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowRange(UnicodeRanges.LatinExtendedA);
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i < 0x0100; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0100; i <= 0x017F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0180; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowRanges()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowRanges(UnicodeRanges.LatinExtendedA, UnicodeRanges.LatinExtendedC);
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i < 0x0100; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0100; i <= 0x017F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0180; i < 0x2C60; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x2C60; i <= 0x2C7F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x2C80; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Clear()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
for (int i = 1; i <= Char.MaxValue; i++)
|
||||
{
|
||||
filter.AllowChar((char)i);
|
||||
}
|
||||
|
||||
// Act
|
||||
var retVal = filter.Clear();
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidChar()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeRanges.BasicLatin);
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidChar('x');
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('w'));
|
||||
Assert.False(filter.IsCharacterAllowed('x'));
|
||||
Assert.True(filter.IsCharacterAllowed('y'));
|
||||
Assert.True(filter.IsCharacterAllowed('z'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidChars_Array()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeRanges.BasicLatin);
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidChars('x', 'z');
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('w'));
|
||||
Assert.False(filter.IsCharacterAllowed('x'));
|
||||
Assert.True(filter.IsCharacterAllowed('y'));
|
||||
Assert.False(filter.IsCharacterAllowed('z'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidChars_String()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeRanges.BasicLatin);
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidChars("xz");
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('w'));
|
||||
Assert.False(filter.IsCharacterAllowed('x'));
|
||||
Assert.True(filter.IsCharacterAllowed('y'));
|
||||
Assert.False(filter.IsCharacterAllowed('z'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidRange()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(new OddCodePointFilter());
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidRange(UnicodeRanges.Specials);
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i <= 0xFFEF; i++)
|
||||
{
|
||||
Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0xFFF0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidRanges()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(new OddCodePointFilter());
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidRanges(UnicodeRanges.BasicLatin, UnicodeRanges.Specials);
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i <= 0x007F; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0080; i <= 0xFFEF; i++)
|
||||
{
|
||||
Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0xFFF0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetAllowedCodePoints()
|
||||
{
|
||||
// Arrange
|
||||
var expected = Enumerable.Range(UnicodeRanges.BasicLatin.FirstCodePoint, UnicodeRanges.BasicLatin.RangeSize)
|
||||
.Concat(Enumerable.Range(UnicodeRanges.Specials.FirstCodePoint, UnicodeRanges.Specials.RangeSize))
|
||||
.Except(new int[] { 'x' })
|
||||
.OrderBy(i => i)
|
||||
.ToArray();
|
||||
|
||||
var filter = new CodePointFilter(UnicodeRanges.BasicLatin, UnicodeRanges.Specials);
|
||||
filter.ForbidChar('x');
|
||||
|
||||
// Act
|
||||
var retVal = filter.GetAllowedCodePoints().OrderBy(i => i).ToArray();
|
||||
|
||||
// Assert
|
||||
Assert.Equal<int>(expected, retVal);
|
||||
}
|
||||
|
||||
// a code point filter which allows only odd code points through
|
||||
private sealed class OddCodePointFilter : ICodePointFilter
|
||||
{
|
||||
public IEnumerable<int> GetAllowedCodePoints()
|
||||
{
|
||||
for (int i = 1; i <= Char.MaxValue; i += 2)
|
||||
{
|
||||
yield return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
/// <summary>
|
||||
/// Encoder used for unit testing.
|
||||
/// </summary>
|
||||
internal sealed class CommonTestEncoder : IHtmlEncoder, IJavaScriptStringEncoder, IUrlEncoder
|
||||
{
|
||||
/// <summary>
|
||||
/// Returns "HtmlEncode[[value]]".
|
||||
/// </summary>
|
||||
public string HtmlEncode(string value)
|
||||
{
|
||||
return EncodeCore(value);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes "HtmlEncode[[value]]".
|
||||
/// </summary>
|
||||
public void HtmlEncode(string value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
EncodeCore(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes "HtmlEncode[[value]]".
|
||||
/// </summary>
|
||||
public void HtmlEncode(char[] value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
EncodeCore(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns "JavaScriptStringEncode[[value]]".
|
||||
/// </summary>
|
||||
public string JavaScriptStringEncode(string value)
|
||||
{
|
||||
return EncodeCore(value);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes "JavaScriptStringEncode[[value]]".
|
||||
/// </summary>
|
||||
public void JavaScriptStringEncode(string value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
EncodeCore(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes "JavaScriptStringEncode[[value]]".
|
||||
/// </summary>
|
||||
public void JavaScriptStringEncode(char[] value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
EncodeCore(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns "UrlEncode[[value]]".
|
||||
/// </summary>
|
||||
public string UrlEncode(string value)
|
||||
{
|
||||
return EncodeCore(value);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes "UrlEncode[[value]]".
|
||||
/// </summary>
|
||||
public void UrlEncode(string value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
EncodeCore(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes "UrlEncode[[value]]".
|
||||
/// </summary>
|
||||
public void UrlEncode(char[] value, int startIndex, int charCount, TextWriter output)
|
||||
{
|
||||
EncodeCore(value, startIndex, charCount, output);
|
||||
}
|
||||
|
||||
private static string EncodeCore(string value, [CallerMemberName] string encodeType = null)
|
||||
{
|
||||
return String.Format(CultureInfo.InvariantCulture, "{0}[[{1}]]", encodeType, value);
|
||||
}
|
||||
|
||||
private static void EncodeCore(string value, int startIndex, int charCount, TextWriter output, [CallerMemberName] string encodeType = null)
|
||||
{
|
||||
output.Write(EncodeCore(value.Substring(startIndex, charCount), encodeType));
|
||||
}
|
||||
|
||||
private static void EncodeCore(char[] value, int startIndex, int charCount, TextWriter output, [CallerMemberName] string encodeType = null)
|
||||
{
|
||||
output.Write(EncodeCore(new string(value, startIndex, charCount), encodeType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class EncoderCommonTests
|
||||
{
|
||||
[Theory]
|
||||
[InlineData(10000, 3, 16 * 1024)] // we cap at 16k chars
|
||||
[InlineData(5000, 3, 15000)] // haven't exceeded the 16k cap
|
||||
[InlineData(40000, 3, 40000)] // if we spill over the LOH, we still allocate an output buffer equivalent in length to the input buffer
|
||||
[InlineData(512, Int32.MaxValue, 16 * 1024)] // make sure we can handle numeric overflow
|
||||
public void GetCapacityOfOutputStringBuilder(int numCharsToEncode, int worstCaseOutputCharsPerInputChar, int expectedResult)
|
||||
{
|
||||
Assert.Equal(expectedResult, EncoderCommon.GetCapacityOfOutputStringBuilder(numCharsToEncode, worstCaseOutputCharsPerInputChar));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,72 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class EncoderExtensionsTests
|
||||
{
|
||||
[Fact]
|
||||
public void HtmlEncode_ParameterChecks()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => EncoderExtensions.HtmlEncode(null, "Hello!", new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_PositiveTestCase()
|
||||
{
|
||||
// Arrange
|
||||
IHtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All);
|
||||
StringWriter writer = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.HtmlEncode("Hello+there!", writer);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("Hello+there!", writer.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_ParameterChecks()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => EncoderExtensions.JavaScriptStringEncode(null, "Hello!", new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_PositiveTestCase()
|
||||
{
|
||||
// Arrange
|
||||
IJavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All);
|
||||
StringWriter writer = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.JavaScriptStringEncode("Hello+there!", writer);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(@"Hello\u002Bthere!", writer.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_ParameterChecks()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => EncoderExtensions.UrlEncode(null, "Hello!", new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_PositiveTestCase()
|
||||
{
|
||||
// Arrange
|
||||
IUrlEncoder encoder = new UrlEncoder(UnicodeRanges.All);
|
||||
StringWriter writer = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.UrlEncode("Hello+there!", writer);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("Hello%2Bthere!", writer.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,9 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.WebEncoders.Testing;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
|
|
@ -19,12 +21,12 @@ namespace Microsoft.Extensions.WebEncoders
|
|||
|
||||
// Assert
|
||||
var serviceProvider = serviceCollection.BuildServiceProvider();
|
||||
Assert.Same(HtmlEncoder.Default, serviceProvider.GetRequiredService<IHtmlEncoder>()); // default encoder
|
||||
Assert.Same(HtmlEncoder.Default, serviceProvider.GetRequiredService<IHtmlEncoder>()); // as singleton instance
|
||||
Assert.Same(JavaScriptStringEncoder.Default, serviceProvider.GetRequiredService<IJavaScriptStringEncoder>()); // default encoder
|
||||
Assert.Same(JavaScriptStringEncoder.Default, serviceProvider.GetRequiredService<IJavaScriptStringEncoder>()); // as singleton instance
|
||||
Assert.Same(UrlEncoder.Default, serviceProvider.GetRequiredService<IUrlEncoder>()); // default encoder
|
||||
Assert.Same(UrlEncoder.Default, serviceProvider.GetRequiredService<IUrlEncoder>()); // as singleton instance
|
||||
Assert.Same(HtmlEncoder.Default, serviceProvider.GetRequiredService<HtmlEncoder>()); // default encoder
|
||||
Assert.Same(HtmlEncoder.Default, serviceProvider.GetRequiredService<HtmlEncoder>()); // as singleton instance
|
||||
Assert.Same(JavaScriptEncoder.Default, serviceProvider.GetRequiredService<JavaScriptEncoder>()); // default encoder
|
||||
Assert.Same(JavaScriptEncoder.Default, serviceProvider.GetRequiredService<JavaScriptEncoder>()); // as singleton instance
|
||||
Assert.Same(UrlEncoder.Default, serviceProvider.GetRequiredService<UrlEncoder>()); // default encoder
|
||||
Assert.Same(UrlEncoder.Default, serviceProvider.GetRequiredService<UrlEncoder>()); // as singleton instance
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
|
@ -36,23 +38,24 @@ namespace Microsoft.Extensions.WebEncoders
|
|||
// Act
|
||||
serviceCollection.AddWebEncoders(options =>
|
||||
{
|
||||
options.CodePointFilter = new CodePointFilter().AllowChars("ace"); // only these three chars are allowed
|
||||
options.TextEncoderSettings = new TextEncoderSettings();
|
||||
options.TextEncoderSettings.AllowCharacters("ace".ToCharArray()); // only these three chars are allowed
|
||||
});
|
||||
|
||||
// Assert
|
||||
var serviceProvider = serviceCollection.BuildServiceProvider();
|
||||
|
||||
var htmlEncoder = serviceProvider.GetRequiredService<IHtmlEncoder>();
|
||||
Assert.Equal("abcde", htmlEncoder.HtmlEncode("abcde"));
|
||||
Assert.Same(htmlEncoder, serviceProvider.GetRequiredService<IHtmlEncoder>()); // as singleton instance
|
||||
var htmlEncoder = serviceProvider.GetRequiredService<HtmlEncoder>();
|
||||
Assert.Equal("abcde", htmlEncoder.Encode("abcde"));
|
||||
Assert.Same(htmlEncoder, serviceProvider.GetRequiredService<HtmlEncoder>()); // as singleton instance
|
||||
|
||||
var javaScriptStringEncoder = serviceProvider.GetRequiredService<IJavaScriptStringEncoder>();
|
||||
Assert.Equal(@"a\u0062c\u0064e", javaScriptStringEncoder.JavaScriptStringEncode("abcde"));
|
||||
Assert.Same(javaScriptStringEncoder, serviceProvider.GetRequiredService<IJavaScriptStringEncoder>()); // as singleton instance
|
||||
var javaScriptEncoder = serviceProvider.GetRequiredService<JavaScriptEncoder>();
|
||||
Assert.Equal(@"a\u0062c\u0064e", javaScriptEncoder.Encode("abcde"));
|
||||
Assert.Same(javaScriptEncoder, serviceProvider.GetRequiredService<JavaScriptEncoder>()); // as singleton instance
|
||||
|
||||
var urlEncoder = serviceProvider.GetRequiredService<IUrlEncoder>();
|
||||
Assert.Equal("a%62c%64e", urlEncoder.UrlEncode("abcde"));
|
||||
Assert.Same(urlEncoder, serviceProvider.GetRequiredService<IUrlEncoder>()); // as singleton instance
|
||||
var urlEncoder = serviceProvider.GetRequiredService<UrlEncoder>();
|
||||
Assert.Equal("a%62c%64e", urlEncoder.Encode("abcde"));
|
||||
Assert.Same(urlEncoder, serviceProvider.GetRequiredService<UrlEncoder>()); // as singleton instance
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
|
@ -62,25 +65,26 @@ namespace Microsoft.Extensions.WebEncoders
|
|||
var serviceCollection = new ServiceCollection();
|
||||
|
||||
// Act
|
||||
serviceCollection.AddSingleton<IHtmlEncoder, CommonTestEncoder>();
|
||||
serviceCollection.AddSingleton<IJavaScriptStringEncoder, CommonTestEncoder>();
|
||||
serviceCollection.AddSingleton<HtmlEncoder, HtmlTestEncoder>();
|
||||
serviceCollection.AddSingleton<JavaScriptEncoder, JavaScriptTestEncoder>();
|
||||
// we don't register an existing URL encoder
|
||||
serviceCollection.AddWebEncoders(options =>
|
||||
{
|
||||
options.CodePointFilter = new CodePointFilter().AllowChars("ace"); // only these three chars are allowed
|
||||
options.TextEncoderSettings = new TextEncoderSettings();
|
||||
options.TextEncoderSettings.AllowCharacters("ace".ToCharArray()); // only these three chars are allowed
|
||||
});
|
||||
|
||||
// Assert
|
||||
var serviceProvider = serviceCollection.BuildServiceProvider();
|
||||
|
||||
var htmlEncoder = serviceProvider.GetHtmlEncoder();
|
||||
Assert.Equal("HtmlEncode[[abcde]]", htmlEncoder.HtmlEncode("abcde"));
|
||||
Assert.Equal("HtmlEncode[[abcde]]", htmlEncoder.Encode("abcde"));
|
||||
|
||||
var javaScriptStringEncoder = serviceProvider.GetJavaScriptStringEncoder();
|
||||
Assert.Equal("JavaScriptStringEncode[[abcde]]", javaScriptStringEncoder.JavaScriptStringEncode("abcde"));
|
||||
var javaScriptEncoder = serviceProvider.GetJavaScriptEncoder();
|
||||
Assert.Equal("JavaScriptEncode[[abcde]]", javaScriptEncoder.Encode("abcde"));
|
||||
|
||||
var urlEncoder = serviceProvider.GetUrlEncoder();
|
||||
Assert.Equal("a%62c%64e", urlEncoder.UrlEncode("abcde"));
|
||||
Assert.Equal("a%62c%64e", urlEncoder.Encode("abcde"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Text.Encodings.Web;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
|
|
@ -25,7 +26,7 @@ namespace Microsoft.Extensions.WebEncoders
|
|||
public void GetHtmlEncoder_ServiceProviderHasEncoder_ReturnsRegisteredInstance()
|
||||
{
|
||||
// Arrange
|
||||
var expectedEncoder = new HtmlEncoder();
|
||||
var expectedEncoder = HtmlEncoder.Default;
|
||||
var serviceProvider = new TestServiceProvider() { Service = expectedEncoder };
|
||||
|
||||
// Act
|
||||
|
|
@ -36,27 +37,27 @@ namespace Microsoft.Extensions.WebEncoders
|
|||
}
|
||||
|
||||
[Fact]
|
||||
public void GetJavaScriptStringEncoder_ServiceProviderDoesNotHaveEncoder_UsesDefault()
|
||||
public void GetJavaScriptEncoder_ServiceProviderDoesNotHaveEncoder_UsesDefault()
|
||||
{
|
||||
// Arrange
|
||||
var serviceProvider = new TestServiceProvider();
|
||||
|
||||
// Act
|
||||
var retVal = serviceProvider.GetJavaScriptStringEncoder();
|
||||
var retVal = serviceProvider.GetJavaScriptEncoder();
|
||||
|
||||
// Assert
|
||||
Assert.Same(JavaScriptStringEncoder.Default, retVal);
|
||||
Assert.Same(JavaScriptEncoder.Default, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetJavaScriptStringEncoder_ServiceProviderHasEncoder_ReturnsRegisteredInstance()
|
||||
public void GetJavaScriptEncoder_ServiceProviderHasEncoder_ReturnsRegisteredInstance()
|
||||
{
|
||||
// Arrange
|
||||
var expectedEncoder = new JavaScriptStringEncoder();
|
||||
var expectedEncoder = JavaScriptEncoder.Default;
|
||||
var serviceProvider = new TestServiceProvider() { Service = expectedEncoder };
|
||||
|
||||
// Act
|
||||
var retVal = serviceProvider.GetJavaScriptStringEncoder();
|
||||
var retVal = serviceProvider.GetJavaScriptEncoder();
|
||||
|
||||
// Assert
|
||||
Assert.Same(expectedEncoder, retVal);
|
||||
|
|
@ -79,7 +80,7 @@ namespace Microsoft.Extensions.WebEncoders
|
|||
public void GetUrlEncoder_ServiceProviderHasEncoder_ReturnsRegisteredInstance()
|
||||
{
|
||||
// Arrange
|
||||
var expectedEncoder = new UrlEncoder();
|
||||
var expectedEncoder = UrlEncoder.Default;
|
||||
var serviceProvider = new TestServiceProvider() { Service = expectedEncoder };
|
||||
|
||||
// Act
|
||||
|
|
|
|||
|
|
@ -1,38 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Newtonsoft.Json;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
internal static class Entities
|
||||
{
|
||||
public static readonly IDictionary<string, ParsedEntity> ParsedEntities = GetParsedEntities();
|
||||
|
||||
private static IDictionary<string, ParsedEntity> GetParsedEntities()
|
||||
{
|
||||
// read all entries
|
||||
string allEntitiesText = ReadEntitiesJsonFile();
|
||||
var deserializedRawData = new JsonSerializer().Deserialize<IDictionary<string, ParsedEntity>>(new JsonTextReader(new StringReader(allEntitiesText)));
|
||||
|
||||
// strip out all entries which aren't of the form "&entity;"
|
||||
foreach (var key in deserializedRawData.Keys.ToArray() /* dupe since we're mutating original structure */)
|
||||
{
|
||||
if (!key.StartsWith("&", StringComparison.Ordinal) || !key.EndsWith(";", StringComparison.Ordinal))
|
||||
{
|
||||
deserializedRawData.Remove(key);
|
||||
}
|
||||
}
|
||||
return deserializedRawData;
|
||||
}
|
||||
|
||||
private static string ReadEntitiesJsonFile()
|
||||
{
|
||||
return File.ReadAllText("entities.json");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public static class Extensions
|
||||
{
|
||||
public static string[] ReadAllLines(this TextReader reader)
|
||||
{
|
||||
return ReadAllLinesImpl(reader).ToArray();
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ReadAllLinesImpl(TextReader reader)
|
||||
{
|
||||
string line;
|
||||
while ((line = reader.ReadLine()) != null)
|
||||
{
|
||||
yield return line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,269 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class HtmlEncoderTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_WithCodePointFilter()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
||||
HtmlEncoder encoder = new HtmlEncoder(filter);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.HtmlEncode("a"));
|
||||
Assert.Equal("b", encoder.HtmlEncode("b"));
|
||||
Assert.Equal("c", encoder.HtmlEncode("c"));
|
||||
Assert.Equal("d", encoder.HtmlEncode("d"));
|
||||
Assert.Equal("�", encoder.HtmlEncode("\0")); // we still always encode control chars
|
||||
Assert.Equal("&", encoder.HtmlEncode("&")); // we still always encode HTML-special chars
|
||||
Assert.Equal("", encoder.HtmlEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithUnicodeRanges()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.HtmlEncode("a"));
|
||||
Assert.Equal("\u00E9", encoder.HtmlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("\u2601", encoder.HtmlEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithNoParameters_DefaultsToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.HtmlEncode("a"));
|
||||
Assert.Equal("é", encoder.HtmlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("☁", encoder.HtmlEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_EquivalentToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder controlEncoder = new HtmlEncoder(UnicodeRanges.BasicLatin);
|
||||
HtmlEncoder testEncoder = HtmlEncoder.Default;
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
if (!IsSurrogateCodePoint(i))
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
Assert.Equal(controlEncoder.HtmlEncode(input), testEncoder.HtmlEncode(input));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_ReturnsSingletonInstance()
|
||||
{
|
||||
// Act
|
||||
HtmlEncoder encoder1 = HtmlEncoder.Default;
|
||||
HtmlEncoder encoder2 = HtmlEncoder.Default;
|
||||
|
||||
// Assert
|
||||
Assert.Same(encoder1, encoder2);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("<", "<")]
|
||||
[InlineData(">", ">")]
|
||||
[InlineData("&", "&")]
|
||||
[InlineData("'", "'")]
|
||||
[InlineData("\"", """)]
|
||||
[InlineData("+", "+")]
|
||||
public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All);
|
||||
|
||||
// Act
|
||||
string retVal = encoder.HtmlEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All);
|
||||
|
||||
// Act & assert - BMP chars
|
||||
for (int i = 0; i <= 0xFFFF; i++)
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
string expected;
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
|
||||
}
|
||||
else
|
||||
{
|
||||
if (input == "<") { expected = "<"; }
|
||||
else if (input == ">") { expected = ">"; }
|
||||
else if (input == "&") { expected = "&"; }
|
||||
else if (input == "\"") { expected = """; }
|
||||
else
|
||||
{
|
||||
bool mustEncode = false;
|
||||
if (i == '\'' || i == '+')
|
||||
{
|
||||
mustEncode = true; // apostrophe, plus
|
||||
}
|
||||
else if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
|
||||
{
|
||||
mustEncode = true; // control char
|
||||
}
|
||||
else if (!UnicodeHelpers.IsCharacterDefined((char)i))
|
||||
{
|
||||
mustEncode = true; // undefined (or otherwise disallowed) char
|
||||
}
|
||||
|
||||
if (mustEncode)
|
||||
{
|
||||
expected = String.Format(CultureInfo.InvariantCulture, "&#x{0:X};", i);
|
||||
}
|
||||
else
|
||||
{
|
||||
expected = input; // no encoding
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string retVal = encoder.HtmlEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
// Act & assert - astral chars
|
||||
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
||||
{
|
||||
string input = Char.ConvertFromUtf32(i);
|
||||
string expected = String.Format(CultureInfo.InvariantCulture, "&#x{0:X};", i);
|
||||
string retVal = encoder.HtmlEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All); // allow all codepoints
|
||||
|
||||
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
||||
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
||||
const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD𐏿e\uFFFD";
|
||||
|
||||
// Act
|
||||
string retVal = encoder.HtmlEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_EmptyStringInput_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("", encoder.HtmlEncode(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
string input = "Hello, there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Same(input, encoder.HtmlEncode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_NullInput_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Null(encoder.HtmlEncode(null));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_WithCharsRequiringEncodingAtBeginning()
|
||||
{
|
||||
Assert.Equal("&Hello, there!", new HtmlEncoder().HtmlEncode("&Hello, there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_WithCharsRequiringEncodingAtEnd()
|
||||
{
|
||||
Assert.Equal("Hello, there!&", new HtmlEncoder().HtmlEncode("Hello, there!&"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_WithCharsRequiringEncodingInMiddle()
|
||||
{
|
||||
Assert.Equal("Hello, &there!", new HtmlEncoder().HtmlEncode("Hello, &there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_WithCharsRequiringEncodingInterspersed()
|
||||
{
|
||||
Assert.Equal("Hello, <there>!", new HtmlEncoder().HtmlEncode("Hello, <there>!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_CharArray()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.HtmlEncode("Hello+world!".ToCharArray(), 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("lo+wo", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_StringSubstring()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.HtmlEncode("Hello+world!", 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("lo+wo", output.ToString());
|
||||
}
|
||||
|
||||
private static bool IsSurrogateCodePoint(int codePoint)
|
||||
{
|
||||
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,331 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class JavaScriptStringEncoderTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_WithCodePointFilter()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(filter);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.JavaScriptStringEncode("a"));
|
||||
Assert.Equal("b", encoder.JavaScriptStringEncode("b"));
|
||||
Assert.Equal(@"\u0063", encoder.JavaScriptStringEncode("c"));
|
||||
Assert.Equal("d", encoder.JavaScriptStringEncode("d"));
|
||||
Assert.Equal(@"\u0000", encoder.JavaScriptStringEncode("\0")); // we still always encode control chars
|
||||
Assert.Equal(@"\u0026", encoder.JavaScriptStringEncode("&")); // we still always encode HTML-special chars
|
||||
Assert.Equal(@"\uFFFF", encoder.JavaScriptStringEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithUnicodeRanges()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal(@"\u0061", encoder.JavaScriptStringEncode("a"));
|
||||
Assert.Equal("\u00E9", encoder.JavaScriptStringEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("\u2601", encoder.JavaScriptStringEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithNoParameters_DefaultsToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.JavaScriptStringEncode("a"));
|
||||
Assert.Equal(@"\u00E9", encoder.JavaScriptStringEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal(@"\u2601", encoder.JavaScriptStringEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_EquivalentToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder controlEncoder = new JavaScriptStringEncoder(UnicodeRanges.BasicLatin);
|
||||
JavaScriptStringEncoder testEncoder = JavaScriptStringEncoder.Default;
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
if (!IsSurrogateCodePoint(i))
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
Assert.Equal(controlEncoder.JavaScriptStringEncode(input), testEncoder.JavaScriptStringEncode(input));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_ReturnsSingletonInstance()
|
||||
{
|
||||
// Act
|
||||
JavaScriptStringEncoder encoder1 = JavaScriptStringEncoder.Default;
|
||||
JavaScriptStringEncoder encoder2 = JavaScriptStringEncoder.Default;
|
||||
|
||||
// Assert
|
||||
Assert.Same(encoder1, encoder2);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("<", @"\u003C")]
|
||||
[InlineData(">", @"\u003E")]
|
||||
[InlineData("&", @"\u0026")]
|
||||
[InlineData("'", @"\u0027")]
|
||||
[InlineData("\"", @"\u0022")]
|
||||
[InlineData("+", @"\u002B")]
|
||||
[InlineData("\\", @"\\")]
|
||||
[InlineData("/", @"\/")]
|
||||
[InlineData("\b", @"\b")]
|
||||
[InlineData("\f", @"\f")]
|
||||
[InlineData("\n", @"\n")]
|
||||
[InlineData("\t", @"\t")]
|
||||
[InlineData("\r", @"\r")]
|
||||
public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All);
|
||||
|
||||
// Act
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All);
|
||||
|
||||
// Act & assert - BMP chars
|
||||
for (int i = 0; i <= 0xFFFF; i++)
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
string expected;
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
|
||||
}
|
||||
else
|
||||
{
|
||||
if (input == "\b") { expected = @"\b"; }
|
||||
else if (input == "\t") { expected = @"\t"; }
|
||||
else if (input == "\n") { expected = @"\n"; }
|
||||
else if (input == "\f") { expected = @"\f"; }
|
||||
else if (input == "\r") { expected = @"\r"; }
|
||||
else if (input == "\\") { expected = @"\\"; }
|
||||
else if (input == "/") { expected = @"\/"; }
|
||||
else
|
||||
{
|
||||
bool mustEncode = false;
|
||||
switch (i)
|
||||
{
|
||||
case '<':
|
||||
case '>':
|
||||
case '&':
|
||||
case '\"':
|
||||
case '\'':
|
||||
case '+':
|
||||
mustEncode = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
|
||||
{
|
||||
mustEncode = true; // control char
|
||||
}
|
||||
else if (!UnicodeHelpers.IsCharacterDefined((char)i))
|
||||
{
|
||||
mustEncode = true; // undefined (or otherwise disallowed) char
|
||||
}
|
||||
|
||||
if (mustEncode)
|
||||
{
|
||||
expected = String.Format(CultureInfo.InvariantCulture, @"\u{0:X4}", i);
|
||||
}
|
||||
else
|
||||
{
|
||||
expected = input; // no encoding
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
// Act & assert - astral chars
|
||||
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
||||
{
|
||||
string input = Char.ConvertFromUtf32(i);
|
||||
string expected = String.Format(CultureInfo.InvariantCulture, @"\u{0:X4}\u{1:X4}", (uint)input[0], (uint)input[1]);
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All); // allow all codepoints
|
||||
|
||||
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
||||
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
||||
const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD\\uD800\\uDFFFe\uFFFD"; // 'D800' 'DFFF' was preserved since it's valid
|
||||
|
||||
// Act
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_EmptyStringInput_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("", encoder.JavaScriptStringEncode(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
string input = "Hello, there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Same(input, encoder.JavaScriptStringEncode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_NullInput_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Null(encoder.JavaScriptStringEncode(null));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_WithCharsRequiringEncodingAtBeginning()
|
||||
{
|
||||
Assert.Equal(@"\u0026Hello, there!", new JavaScriptStringEncoder().JavaScriptStringEncode("&Hello, there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_WithCharsRequiringEncodingAtEnd()
|
||||
{
|
||||
Assert.Equal(@"Hello, there!\u0026", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, there!&"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_WithCharsRequiringEncodingInMiddle()
|
||||
{
|
||||
Assert.Equal(@"Hello, \u0026there!", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, &there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_WithCharsRequiringEncodingInterspersed()
|
||||
{
|
||||
Assert.Equal(@"Hello, \u003Cthere\u003E!", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, <there>!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_CharArray()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.JavaScriptStringEncode("Hello+world!".ToCharArray(), 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(@"lo\u002Bwo", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_StringSubstring()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.JavaScriptStringEncode("Hello+world!", 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(@"lo\u002Bwo", output.ToString());
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("\"", @"\u0022")]
|
||||
[InlineData("'", @"\u0027")]
|
||||
public void JavaScriptStringEncode_Quotes(string input, string expected)
|
||||
{
|
||||
// Per the design document, we provide additional defense-in-depth
|
||||
// against breaking out of HTML attributes by having the encoders
|
||||
// never emit the ' or " characters. This means that we want to
|
||||
// \u-escape these characters instead of using \' and \".
|
||||
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All);
|
||||
|
||||
// Act
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_DoesNotOutputHtmlSensitiveCharacters()
|
||||
{
|
||||
// Per the design document, we provide additional defense-in-depth
|
||||
// by never emitting HTML-sensitive characters unescaped.
|
||||
|
||||
// Arrange
|
||||
JavaScriptStringEncoder javaScriptStringEncoder = new JavaScriptStringEncoder(UnicodeRanges.All);
|
||||
HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeRanges.All);
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= 0x10FFFF; i++)
|
||||
{
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
continue; // surrogates don't matter here
|
||||
}
|
||||
|
||||
string javaScriptStringEncoded = javaScriptStringEncoder.JavaScriptStringEncode(Char.ConvertFromUtf32(i));
|
||||
string thenHtmlEncoded = htmlEncoder.HtmlEncode(javaScriptStringEncoded);
|
||||
Assert.Equal(javaScriptStringEncoded, thenHtmlEncoded); // should have contained no HTML-sensitive characters
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsSurrogateCodePoint(int codePoint)
|
||||
{
|
||||
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using Newtonsoft.Json;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
internal sealed class ParsedEntity
|
||||
{
|
||||
[JsonProperty("codepoints")]
|
||||
public int[] Codepoints { get; set; }
|
||||
|
||||
[JsonProperty("characters")]
|
||||
public string DecodedString { get; set; }
|
||||
}
|
||||
}
|
||||
|
|
@ -1,406 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class UnicodeEncoderBaseTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_WithCustomFilters()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(filter);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.Encode("a"));
|
||||
Assert.Equal("b", encoder.Encode("b"));
|
||||
Assert.Equal("[U+0063]", encoder.Encode("c"));
|
||||
Assert.Equal("d", encoder.Encode("d"));
|
||||
Assert.Equal("[U+0000]", encoder.Encode("\0")); // we still always encode control chars
|
||||
Assert.Equal("[U+0026]", encoder.Encode("&")); // we still always encode HTML-special chars
|
||||
Assert.Equal("[U+FFFF]", encoder.Encode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithUnicodeRanges()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(new CodePointFilter(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols));
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("[U+0061]", encoder.Encode("a"));
|
||||
Assert.Equal("\u00E9", encoder.Encode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("\u2601", encoder.Encode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
const string input = "Hello <>&\'\"+ there!";
|
||||
const string expected = "Hello [U+003C][U+003E][U+0026][U+0027][U+0022][U+002B] there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal(expected, encoder.Encode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
|
||||
// Act & assert - BMP chars
|
||||
for (int i = 0; i <= 0xFFFF; i++)
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
string expected;
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
|
||||
}
|
||||
else
|
||||
{
|
||||
bool mustEncode = false;
|
||||
switch (i)
|
||||
{
|
||||
case '<':
|
||||
case '>':
|
||||
case '&':
|
||||
case '\"':
|
||||
case '\'':
|
||||
case '+':
|
||||
mustEncode = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
|
||||
{
|
||||
mustEncode = true; // control char
|
||||
}
|
||||
else if (!UnicodeHelpers.IsCharacterDefined((char)i))
|
||||
{
|
||||
mustEncode = true; // undefined (or otherwise disallowed) char
|
||||
}
|
||||
|
||||
if (mustEncode)
|
||||
{
|
||||
expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", i);
|
||||
}
|
||||
else
|
||||
{
|
||||
expected = input; // no encoding
|
||||
}
|
||||
}
|
||||
|
||||
string retVal = encoder.Encode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
// Act & assert - astral chars
|
||||
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
||||
{
|
||||
string input = Char.ConvertFromUtf32(i);
|
||||
string expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X}]", i);
|
||||
string retVal = encoder.Encode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); // allow all codepoints
|
||||
|
||||
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
||||
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
||||
const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD[U+103FF]e\uFFFD";
|
||||
|
||||
// Act
|
||||
string retVal = encoder.Encode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_EmptyStringInput_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("", encoder.Encode(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
string input = "Hello, there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Same(input, encoder.Encode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_NullInput_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
|
||||
// Act & assert
|
||||
Assert.Null(encoder.Encode(null));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_WithCharsRequiringEncodingAtBeginning()
|
||||
{
|
||||
Assert.Equal("[U+0026]Hello, there!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("&Hello, there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_WithCharsRequiringEncodingAtEnd()
|
||||
{
|
||||
Assert.Equal("Hello, there![U+0026]", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, there!&"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_WithCharsRequiringEncodingInMiddle()
|
||||
{
|
||||
Assert.Equal("Hello, [U+0026]there!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, &there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_WithCharsRequiringEncodingInterspersed()
|
||||
{
|
||||
Assert.Equal("Hello, [U+003C]there[U+003E]!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, <there>!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_ParameterChecking_NegativeTestCases()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
|
||||
|
||||
// Act & assert
|
||||
Assert.Throws<ArgumentNullException>(() => encoder.Encode((char[])null, 0, 0, new StringWriter()));
|
||||
Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc".ToCharArray(), 0, 3, null));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), -1, 2, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, 2, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 4, 0, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, -1, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 1, 3, new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_ZeroCount_DoesNotCallIntoTextWriter()
|
||||
{
|
||||
// Arrange
|
||||
var encoder = new CustomUnicodeEncoderBase();
|
||||
var output = new StringWriter();
|
||||
output.Dispose(); // Throws ODE if written to.
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc".ToCharArray(), 2, 0, output);
|
||||
|
||||
// Assert
|
||||
// If we got this far (without TextWriter throwing), success!
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_AllCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("xy", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_AllCharsInvalid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("[U+0078][U+0079]", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_SomeCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz".ToCharArray(), 2, 3, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("c[U+0026]x", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_ParameterChecking_NegativeTestCases()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
|
||||
|
||||
// Act & assert
|
||||
Assert.Throws<ArgumentNullException>(() => encoder.Encode((string)null, 0, 0, new StringWriter()));
|
||||
Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc", 0, 3, null));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", -1, 2, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, 2, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 4, 0, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, -1, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 1, 3, new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_ZeroCount_DoesNotCallIntoTextWriter()
|
||||
{
|
||||
// Arrange
|
||||
var encoder = new CustomUnicodeEncoderBase();
|
||||
var output = new StringWriter();
|
||||
output.Dispose(); // Throws ODE if written to.
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc", 2, 0, output);
|
||||
|
||||
// Assert
|
||||
// If we got this far (without TextWriter throwing), success!
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_AllCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz", 4, 2, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("xy", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_EntireString_AllCharsValid_ForwardDirectlyToOutput()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc", 0, 3, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("abc", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_AllCharsInvalid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz", 4, 2, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("[U+0078][U+0079]", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_SomeCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz", 2, 3, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("c[U+0026]x", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_EntireString_SomeCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
const string input = "abc&xyz";
|
||||
encoder.Encode(input, 0, input.Length, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("abc[U+0026]xyz", output.ToString());
|
||||
}
|
||||
|
||||
private static bool IsSurrogateCodePoint(int codePoint)
|
||||
{
|
||||
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
||||
}
|
||||
|
||||
private sealed class CustomCodePointFilter : ICodePointFilter
|
||||
{
|
||||
private readonly int[] _allowedCodePoints;
|
||||
|
||||
public CustomCodePointFilter(params int[] allowedCodePoints)
|
||||
{
|
||||
_allowedCodePoints = allowedCodePoints;
|
||||
}
|
||||
|
||||
public IEnumerable<int> GetAllowedCodePoints()
|
||||
{
|
||||
return _allowedCodePoints;
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class CustomUnicodeEncoderBase : UnicodeEncoderBase
|
||||
{
|
||||
// We pass a (known bad) value of 1 for 'max output chars per input char',
|
||||
// which also tests that the code behaves properly even if the original
|
||||
// estimate is incorrect.
|
||||
public CustomUnicodeEncoderBase(CodePointFilter filter)
|
||||
: base(filter, maxOutputCharsPerInputChar: 1)
|
||||
{
|
||||
}
|
||||
|
||||
public CustomUnicodeEncoderBase(params UnicodeRange[] allowedRanges)
|
||||
: this(new CodePointFilter(allowedRanges))
|
||||
{
|
||||
}
|
||||
|
||||
protected override void WriteEncodedScalar(ref Writer writer, uint value)
|
||||
{
|
||||
writer.Write(String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", value));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,222 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public unsafe class UnicodeHelpersTests
|
||||
{
|
||||
private const int UnicodeReplacementChar = '\uFFFD';
|
||||
|
||||
private static readonly UTF8Encoding _utf8EncodingThrowOnInvalidBytes = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
|
||||
|
||||
[Fact]
|
||||
public void GetDefinedCharacterBitmap_ReturnsSingletonInstance()
|
||||
{
|
||||
// Act
|
||||
uint[] retVal1 = UnicodeHelpers.GetDefinedCharacterBitmap();
|
||||
uint[] retVal2 = UnicodeHelpers.GetDefinedCharacterBitmap();
|
||||
|
||||
// Assert
|
||||
Assert.Same(retVal1, retVal2);
|
||||
}
|
||||
|
||||
public static TheoryData<int, string, int> Utf16ScalarValues
|
||||
{
|
||||
get
|
||||
{
|
||||
var dataset = new TheoryData<int, string, int>();
|
||||
dataset.Add(1, "a", (int)'a'); // normal BMP char, end of string
|
||||
dataset.Add(2, "ab", (int)'a'); // normal BMP char, not end of string
|
||||
dataset.Add(3, "\uDFFF", UnicodeReplacementChar); // trailing surrogate, end of string
|
||||
dataset.Add(4, "\uDFFFx", UnicodeReplacementChar); // trailing surrogate, not end of string
|
||||
dataset.Add(5, "\uD800", UnicodeReplacementChar); // leading surrogate, end of string
|
||||
dataset.Add(6, "\uD800x", UnicodeReplacementChar); // leading surrogate, not end of string, followed by non-surrogate
|
||||
dataset.Add(7, "\uD800\uD800", UnicodeReplacementChar); // leading surrogate, not end of string, followed by leading surrogate
|
||||
dataset.Add(8, "\uD800\uDFFF", 0x103FF); // leading surrogate, not end of string, followed by trailing surrogate
|
||||
|
||||
return dataset;
|
||||
}
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(Utf16ScalarValues))]
|
||||
public void GetScalarValueFromUtf16(int unused, string input, int expectedResult)
|
||||
{
|
||||
// The 'unused' parameter exists because the xunit runner can't distinguish
|
||||
// the individual malformed data test cases from each other without this
|
||||
// additional identifier.
|
||||
|
||||
fixed (char* pInput = input)
|
||||
{
|
||||
Assert.Equal(expectedResult, UnicodeHelpers.GetScalarValueFromUtf16(pInput, endOfString: (input.Length == 1)));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetUtf8RepresentationForScalarValue()
|
||||
{
|
||||
for (int i = 0; i <= 0x10FFFF; i++)
|
||||
{
|
||||
if (i <= 0xFFFF && Char.IsSurrogate((char)i))
|
||||
{
|
||||
continue; // no surrogates
|
||||
}
|
||||
|
||||
// Arrange
|
||||
byte[] expectedUtf8Bytes = _utf8EncodingThrowOnInvalidBytes.GetBytes(Char.ConvertFromUtf32(i));
|
||||
|
||||
// Act
|
||||
List<byte> actualUtf8Bytes = new List<byte>(4);
|
||||
uint asUtf8 = (uint)UnicodeHelpers.GetUtf8RepresentationForScalarValue((uint)i);
|
||||
do
|
||||
{
|
||||
actualUtf8Bytes.Add((byte)asUtf8);
|
||||
} while ((asUtf8 >>= 8) != 0);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expectedUtf8Bytes, actualUtf8Bytes);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsCharacterDefined()
|
||||
{
|
||||
// Arrange
|
||||
bool[] definedChars = ReadListOfDefinedCharacters();
|
||||
List<string> errors = new List<string>();
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
bool expected = definedChars[i];
|
||||
bool actual = UnicodeHelpers.IsCharacterDefined((char)i);
|
||||
if (expected != actual)
|
||||
{
|
||||
errors.Add($"Character U+{i:X4}: expected = {expected}, actual = {actual}");
|
||||
}
|
||||
}
|
||||
|
||||
if (errors.Count > 0)
|
||||
{
|
||||
Assert.True(false, String.Join(Environment.NewLine, errors));
|
||||
}
|
||||
}
|
||||
|
||||
private static bool[] ReadListOfDefinedCharacters()
|
||||
{
|
||||
HashSet<string> allowedCategories = new HashSet<string>();
|
||||
|
||||
// Letters
|
||||
allowedCategories.Add("Lu");
|
||||
allowedCategories.Add("Ll");
|
||||
allowedCategories.Add("Lt");
|
||||
allowedCategories.Add("Lm");
|
||||
allowedCategories.Add("Lo");
|
||||
|
||||
// Marks
|
||||
allowedCategories.Add("Mn");
|
||||
allowedCategories.Add("Mc");
|
||||
allowedCategories.Add("Me");
|
||||
|
||||
// Numbers
|
||||
allowedCategories.Add("Nd");
|
||||
allowedCategories.Add("Nl");
|
||||
allowedCategories.Add("No");
|
||||
|
||||
// Punctuation
|
||||
allowedCategories.Add("Pc");
|
||||
allowedCategories.Add("Pd");
|
||||
allowedCategories.Add("Ps");
|
||||
allowedCategories.Add("Pe");
|
||||
allowedCategories.Add("Pi");
|
||||
allowedCategories.Add("Pf");
|
||||
allowedCategories.Add("Po");
|
||||
|
||||
// Symbols
|
||||
allowedCategories.Add("Sm");
|
||||
allowedCategories.Add("Sc");
|
||||
allowedCategories.Add("Sk");
|
||||
allowedCategories.Add("So");
|
||||
|
||||
// Separators
|
||||
// With the exception of U+0020 SPACE, these aren't allowed
|
||||
|
||||
// Other
|
||||
// We only allow one category of 'other' characters
|
||||
allowedCategories.Add("Cf");
|
||||
|
||||
HashSet<string> seenCategories = new HashSet<string>();
|
||||
|
||||
bool[] retVal = new bool[0x10000];
|
||||
|
||||
var assembly = typeof(UnicodeHelpersTests).GetTypeInfo().Assembly;
|
||||
var resourceName = assembly.GetName().Name + ".UnicodeData.txt";
|
||||
string[] allLines = new StreamReader(assembly.GetManifestResourceStream(resourceName)).ReadAllLines();
|
||||
|
||||
foreach (string line in allLines)
|
||||
{
|
||||
string[] splitLine = line.Split(';');
|
||||
uint codePoint = UInt32.Parse(splitLine[0], NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
|
||||
string name = splitLine[1];
|
||||
if (codePoint >= retVal.Length)
|
||||
{
|
||||
continue; // don't care about supplementary chars
|
||||
}
|
||||
|
||||
if (name.EndsWith(", First>", StringComparison.Ordinal) || name.EndsWith(", Last>", StringComparison.Ordinal))
|
||||
{
|
||||
// ignore spans - we'll handle them separately
|
||||
continue;
|
||||
}
|
||||
|
||||
if (codePoint == (uint)' ')
|
||||
{
|
||||
retVal[codePoint] = true; // we allow U+0020 SPACE as our only valid Zs (whitespace) char
|
||||
}
|
||||
else
|
||||
{
|
||||
string category = splitLine[2];
|
||||
if (allowedCategories.Contains(category))
|
||||
{
|
||||
retVal[codePoint] = true; // chars in this category are allowable
|
||||
seenCategories.Add(category);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle known spans from Unicode 8.0's UnicodeData.txt
|
||||
|
||||
// CJK Ideograph Extension A
|
||||
for (int i = '\u3400'; i <= '\u4DB5'; i++)
|
||||
{
|
||||
retVal[i] = true;
|
||||
}
|
||||
// CJK Ideograph
|
||||
for (int i = '\u4E00'; i <= '\u9FD5'; i++)
|
||||
{
|
||||
retVal[i] = true;
|
||||
}
|
||||
// Hangul Syllable
|
||||
for (int i = '\uAC00'; i <= '\uD7A3'; i++)
|
||||
{
|
||||
retVal[i] = true;
|
||||
}
|
||||
|
||||
// Finally, we need to make sure we've seen every category which contains
|
||||
// allowed characters. This provides extra defense against having a typo
|
||||
// in the list of categories.
|
||||
Assert.Equal(allowedCategories.OrderBy(c => c), seenCategories.OrderBy(c => c));
|
||||
|
||||
return retVal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class UnicodeRangeTests
|
||||
{
|
||||
[Theory]
|
||||
[InlineData(-1, 16)]
|
||||
[InlineData(0x10000, 16)]
|
||||
public void Ctor_FailureCase_FirstCodePoint(int firstCodePoint, int rangeSize)
|
||||
{
|
||||
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeRange(firstCodePoint, rangeSize));
|
||||
Assert.Equal("firstCodePoint", ex.ParamName);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(0x0100, -1)]
|
||||
[InlineData(0x0100, 0x10000)]
|
||||
public void Ctor_FailureCase_RangeSize(int firstCodePoint, int rangeSize)
|
||||
{
|
||||
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeRange(firstCodePoint, rangeSize));
|
||||
Assert.Equal("rangeSize", ex.ParamName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_SuccessCase()
|
||||
{
|
||||
// Act
|
||||
var range = new UnicodeRange(0x0100, 128); // Latin Extended-A
|
||||
|
||||
// Assert
|
||||
Assert.Equal(0x0100, range.FirstCodePoint);
|
||||
Assert.Equal(128, range.RangeSize);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FromSpan_FailureCase()
|
||||
{
|
||||
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => UnicodeRange.FromSpan('\u0020', '\u0010'));
|
||||
Assert.Equal("lastChar", ex.ParamName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FromSpan_SuccessCase()
|
||||
{
|
||||
// Act
|
||||
var range = UnicodeRange.FromSpan('\u0180', '\u024F'); // Latin Extended-B
|
||||
|
||||
// Assert
|
||||
Assert.Equal(0x0180, range.FirstCodePoint);
|
||||
Assert.Equal(208, range.RangeSize);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FromSpan_SuccessCase_All()
|
||||
{
|
||||
// Act
|
||||
var range = UnicodeRange.FromSpan('\u0000', '\uFFFF');
|
||||
|
||||
// Assert
|
||||
Assert.Equal(0, range.FirstCodePoint);
|
||||
Assert.Equal(0x10000, range.RangeSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,211 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Reflection;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class UnicodeRangesTests
|
||||
{
|
||||
[Fact]
|
||||
public void Range_None()
|
||||
{
|
||||
UnicodeRange range = UnicodeRanges.None;
|
||||
Assert.NotNull(range);
|
||||
|
||||
// Test 1: the range should be empty
|
||||
Assert.Equal(0, range.FirstCodePoint);
|
||||
Assert.Equal(0, range.RangeSize);
|
||||
|
||||
// Test 2: calling the property multiple times should cache and return the same range instance
|
||||
UnicodeRange range2 = UnicodeRanges.None;
|
||||
Assert.Same(range, range2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Range_All()
|
||||
{
|
||||
Range_Unicode('\u0000', '\uFFFF', nameof(UnicodeRanges.All));
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData('\u0000', '\u007F', nameof(UnicodeRanges.BasicLatin))]
|
||||
[InlineData('\u0080', '\u00FF', nameof(UnicodeRanges.Latin1Supplement))]
|
||||
[InlineData('\u0100', '\u017F', nameof(UnicodeRanges.LatinExtendedA))]
|
||||
[InlineData('\u0180', '\u024F', nameof(UnicodeRanges.LatinExtendedB))]
|
||||
[InlineData('\u0250', '\u02AF', nameof(UnicodeRanges.IPAExtensions))]
|
||||
[InlineData('\u02B0', '\u02FF', nameof(UnicodeRanges.SpacingModifierLetters))]
|
||||
[InlineData('\u0300', '\u036F', nameof(UnicodeRanges.CombiningDiacriticalMarks))]
|
||||
[InlineData('\u0370', '\u03FF', nameof(UnicodeRanges.GreekandCoptic))]
|
||||
[InlineData('\u0400', '\u04FF', nameof(UnicodeRanges.Cyrillic))]
|
||||
[InlineData('\u0500', '\u052F', nameof(UnicodeRanges.CyrillicSupplement))]
|
||||
[InlineData('\u0530', '\u058F', nameof(UnicodeRanges.Armenian))]
|
||||
[InlineData('\u0590', '\u05FF', nameof(UnicodeRanges.Hebrew))]
|
||||
[InlineData('\u0600', '\u06FF', nameof(UnicodeRanges.Arabic))]
|
||||
[InlineData('\u0700', '\u074F', nameof(UnicodeRanges.Syriac))]
|
||||
[InlineData('\u0750', '\u077F', nameof(UnicodeRanges.ArabicSupplement))]
|
||||
[InlineData('\u0780', '\u07BF', nameof(UnicodeRanges.Thaana))]
|
||||
[InlineData('\u07C0', '\u07FF', nameof(UnicodeRanges.NKo))]
|
||||
[InlineData('\u0800', '\u083F', nameof(UnicodeRanges.Samaritan))]
|
||||
[InlineData('\u0840', '\u085F', nameof(UnicodeRanges.Mandaic))]
|
||||
[InlineData('\u08A0', '\u08FF', nameof(UnicodeRanges.ArabicExtendedA))]
|
||||
[InlineData('\u0900', '\u097F', nameof(UnicodeRanges.Devanagari))]
|
||||
[InlineData('\u0980', '\u09FF', nameof(UnicodeRanges.Bengali))]
|
||||
[InlineData('\u0A00', '\u0A7F', nameof(UnicodeRanges.Gurmukhi))]
|
||||
[InlineData('\u0A80', '\u0AFF', nameof(UnicodeRanges.Gujarati))]
|
||||
[InlineData('\u0B00', '\u0B7F', nameof(UnicodeRanges.Oriya))]
|
||||
[InlineData('\u0B80', '\u0BFF', nameof(UnicodeRanges.Tamil))]
|
||||
[InlineData('\u0C00', '\u0C7F', nameof(UnicodeRanges.Telugu))]
|
||||
[InlineData('\u0C80', '\u0CFF', nameof(UnicodeRanges.Kannada))]
|
||||
[InlineData('\u0D00', '\u0D7F', nameof(UnicodeRanges.Malayalam))]
|
||||
[InlineData('\u0D80', '\u0DFF', nameof(UnicodeRanges.Sinhala))]
|
||||
[InlineData('\u0E00', '\u0E7F', nameof(UnicodeRanges.Thai))]
|
||||
[InlineData('\u0E80', '\u0EFF', nameof(UnicodeRanges.Lao))]
|
||||
[InlineData('\u0F00', '\u0FFF', nameof(UnicodeRanges.Tibetan))]
|
||||
[InlineData('\u1000', '\u109F', nameof(UnicodeRanges.Myanmar))]
|
||||
[InlineData('\u10A0', '\u10FF', nameof(UnicodeRanges.Georgian))]
|
||||
[InlineData('\u1100', '\u11FF', nameof(UnicodeRanges.HangulJamo))]
|
||||
[InlineData('\u1200', '\u137F', nameof(UnicodeRanges.Ethiopic))]
|
||||
[InlineData('\u1380', '\u139F', nameof(UnicodeRanges.EthiopicSupplement))]
|
||||
[InlineData('\u13A0', '\u13FF', nameof(UnicodeRanges.Cherokee))]
|
||||
[InlineData('\u1400', '\u167F', nameof(UnicodeRanges.UnifiedCanadianAboriginalSyllabics))]
|
||||
[InlineData('\u1680', '\u169F', nameof(UnicodeRanges.Ogham))]
|
||||
[InlineData('\u16A0', '\u16FF', nameof(UnicodeRanges.Runic))]
|
||||
[InlineData('\u1700', '\u171F', nameof(UnicodeRanges.Tagalog))]
|
||||
[InlineData('\u1720', '\u173F', nameof(UnicodeRanges.Hanunoo))]
|
||||
[InlineData('\u1740', '\u175F', nameof(UnicodeRanges.Buhid))]
|
||||
[InlineData('\u1760', '\u177F', nameof(UnicodeRanges.Tagbanwa))]
|
||||
[InlineData('\u1780', '\u17FF', nameof(UnicodeRanges.Khmer))]
|
||||
[InlineData('\u1800', '\u18AF', nameof(UnicodeRanges.Mongolian))]
|
||||
[InlineData('\u18B0', '\u18FF', nameof(UnicodeRanges.UnifiedCanadianAboriginalSyllabicsExtended))]
|
||||
[InlineData('\u1900', '\u194F', nameof(UnicodeRanges.Limbu))]
|
||||
[InlineData('\u1950', '\u197F', nameof(UnicodeRanges.TaiLe))]
|
||||
[InlineData('\u1980', '\u19DF', nameof(UnicodeRanges.NewTaiLue))]
|
||||
[InlineData('\u19E0', '\u19FF', nameof(UnicodeRanges.KhmerSymbols))]
|
||||
[InlineData('\u1A00', '\u1A1F', nameof(UnicodeRanges.Buginese))]
|
||||
[InlineData('\u1A20', '\u1AAF', nameof(UnicodeRanges.TaiTham))]
|
||||
[InlineData('\u1AB0', '\u1AFF', nameof(UnicodeRanges.CombiningDiacriticalMarksExtended))]
|
||||
[InlineData('\u1B00', '\u1B7F', nameof(UnicodeRanges.Balinese))]
|
||||
[InlineData('\u1B80', '\u1BBF', nameof(UnicodeRanges.Sundanese))]
|
||||
[InlineData('\u1BC0', '\u1BFF', nameof(UnicodeRanges.Batak))]
|
||||
[InlineData('\u1C00', '\u1C4F', nameof(UnicodeRanges.Lepcha))]
|
||||
[InlineData('\u1C50', '\u1C7F', nameof(UnicodeRanges.OlChiki))]
|
||||
[InlineData('\u1CC0', '\u1CCF', nameof(UnicodeRanges.SundaneseSupplement))]
|
||||
[InlineData('\u1CD0', '\u1CFF', nameof(UnicodeRanges.VedicExtensions))]
|
||||
[InlineData('\u1D00', '\u1D7F', nameof(UnicodeRanges.PhoneticExtensions))]
|
||||
[InlineData('\u1D80', '\u1DBF', nameof(UnicodeRanges.PhoneticExtensionsSupplement))]
|
||||
[InlineData('\u1DC0', '\u1DFF', nameof(UnicodeRanges.CombiningDiacriticalMarksSupplement))]
|
||||
[InlineData('\u1E00', '\u1EFF', nameof(UnicodeRanges.LatinExtendedAdditional))]
|
||||
[InlineData('\u1F00', '\u1FFF', nameof(UnicodeRanges.GreekExtended))]
|
||||
[InlineData('\u2000', '\u206F', nameof(UnicodeRanges.GeneralPunctuation))]
|
||||
[InlineData('\u2070', '\u209F', nameof(UnicodeRanges.SuperscriptsandSubscripts))]
|
||||
[InlineData('\u20A0', '\u20CF', nameof(UnicodeRanges.CurrencySymbols))]
|
||||
[InlineData('\u20D0', '\u20FF', nameof(UnicodeRanges.CombiningDiacriticalMarksforSymbols))]
|
||||
[InlineData('\u2100', '\u214F', nameof(UnicodeRanges.LetterlikeSymbols))]
|
||||
[InlineData('\u2150', '\u218F', nameof(UnicodeRanges.NumberForms))]
|
||||
[InlineData('\u2190', '\u21FF', nameof(UnicodeRanges.Arrows))]
|
||||
[InlineData('\u2200', '\u22FF', nameof(UnicodeRanges.MathematicalOperators))]
|
||||
[InlineData('\u2300', '\u23FF', nameof(UnicodeRanges.MiscellaneousTechnical))]
|
||||
[InlineData('\u2400', '\u243F', nameof(UnicodeRanges.ControlPictures))]
|
||||
[InlineData('\u2440', '\u245F', nameof(UnicodeRanges.OpticalCharacterRecognition))]
|
||||
[InlineData('\u2460', '\u24FF', nameof(UnicodeRanges.EnclosedAlphanumerics))]
|
||||
[InlineData('\u2500', '\u257F', nameof(UnicodeRanges.BoxDrawing))]
|
||||
[InlineData('\u2580', '\u259F', nameof(UnicodeRanges.BlockElements))]
|
||||
[InlineData('\u25A0', '\u25FF', nameof(UnicodeRanges.GeometricShapes))]
|
||||
[InlineData('\u2600', '\u26FF', nameof(UnicodeRanges.MiscellaneousSymbols))]
|
||||
[InlineData('\u2700', '\u27BF', nameof(UnicodeRanges.Dingbats))]
|
||||
[InlineData('\u27C0', '\u27EF', nameof(UnicodeRanges.MiscellaneousMathematicalSymbolsA))]
|
||||
[InlineData('\u27F0', '\u27FF', nameof(UnicodeRanges.SupplementalArrowsA))]
|
||||
[InlineData('\u2800', '\u28FF', nameof(UnicodeRanges.BraillePatterns))]
|
||||
[InlineData('\u2900', '\u297F', nameof(UnicodeRanges.SupplementalArrowsB))]
|
||||
[InlineData('\u2980', '\u29FF', nameof(UnicodeRanges.MiscellaneousMathematicalSymbolsB))]
|
||||
[InlineData('\u2A00', '\u2AFF', nameof(UnicodeRanges.SupplementalMathematicalOperators))]
|
||||
[InlineData('\u2B00', '\u2BFF', nameof(UnicodeRanges.MiscellaneousSymbolsandArrows))]
|
||||
[InlineData('\u2C00', '\u2C5F', nameof(UnicodeRanges.Glagolitic))]
|
||||
[InlineData('\u2C60', '\u2C7F', nameof(UnicodeRanges.LatinExtendedC))]
|
||||
[InlineData('\u2C80', '\u2CFF', nameof(UnicodeRanges.Coptic))]
|
||||
[InlineData('\u2D00', '\u2D2F', nameof(UnicodeRanges.GeorgianSupplement))]
|
||||
[InlineData('\u2D30', '\u2D7F', nameof(UnicodeRanges.Tifinagh))]
|
||||
[InlineData('\u2D80', '\u2DDF', nameof(UnicodeRanges.EthiopicExtended))]
|
||||
[InlineData('\u2DE0', '\u2DFF', nameof(UnicodeRanges.CyrillicExtendedA))]
|
||||
[InlineData('\u2E00', '\u2E7F', nameof(UnicodeRanges.SupplementalPunctuation))]
|
||||
[InlineData('\u2E80', '\u2EFF', nameof(UnicodeRanges.CJKRadicalsSupplement))]
|
||||
[InlineData('\u2F00', '\u2FDF', nameof(UnicodeRanges.KangxiRadicals))]
|
||||
[InlineData('\u2FF0', '\u2FFF', nameof(UnicodeRanges.IdeographicDescriptionCharacters))]
|
||||
[InlineData('\u3000', '\u303F', nameof(UnicodeRanges.CJKSymbolsandPunctuation))]
|
||||
[InlineData('\u3040', '\u309F', nameof(UnicodeRanges.Hiragana))]
|
||||
[InlineData('\u30A0', '\u30FF', nameof(UnicodeRanges.Katakana))]
|
||||
[InlineData('\u3100', '\u312F', nameof(UnicodeRanges.Bopomofo))]
|
||||
[InlineData('\u3130', '\u318F', nameof(UnicodeRanges.HangulCompatibilityJamo))]
|
||||
[InlineData('\u3190', '\u319F', nameof(UnicodeRanges.Kanbun))]
|
||||
[InlineData('\u31A0', '\u31BF', nameof(UnicodeRanges.BopomofoExtended))]
|
||||
[InlineData('\u31C0', '\u31EF', nameof(UnicodeRanges.CJKStrokes))]
|
||||
[InlineData('\u31F0', '\u31FF', nameof(UnicodeRanges.KatakanaPhoneticExtensions))]
|
||||
[InlineData('\u3200', '\u32FF', nameof(UnicodeRanges.EnclosedCJKLettersandMonths))]
|
||||
[InlineData('\u3300', '\u33FF', nameof(UnicodeRanges.CJKCompatibility))]
|
||||
[InlineData('\u3400', '\u4DBF', nameof(UnicodeRanges.CJKUnifiedIdeographsExtensionA))]
|
||||
[InlineData('\u4DC0', '\u4DFF', nameof(UnicodeRanges.YijingHexagramSymbols))]
|
||||
[InlineData('\u4E00', '\u9FFF', nameof(UnicodeRanges.CJKUnifiedIdeographs))]
|
||||
[InlineData('\uA000', '\uA48F', nameof(UnicodeRanges.YiSyllables))]
|
||||
[InlineData('\uA490', '\uA4CF', nameof(UnicodeRanges.YiRadicals))]
|
||||
[InlineData('\uA4D0', '\uA4FF', nameof(UnicodeRanges.Lisu))]
|
||||
[InlineData('\uA500', '\uA63F', nameof(UnicodeRanges.Vai))]
|
||||
[InlineData('\uA640', '\uA69F', nameof(UnicodeRanges.CyrillicExtendedB))]
|
||||
[InlineData('\uA6A0', '\uA6FF', nameof(UnicodeRanges.Bamum))]
|
||||
[InlineData('\uA700', '\uA71F', nameof(UnicodeRanges.ModifierToneLetters))]
|
||||
[InlineData('\uA720', '\uA7FF', nameof(UnicodeRanges.LatinExtendedD))]
|
||||
[InlineData('\uA800', '\uA82F', nameof(UnicodeRanges.SylotiNagri))]
|
||||
[InlineData('\uA830', '\uA83F', nameof(UnicodeRanges.CommonIndicNumberForms))]
|
||||
[InlineData('\uA840', '\uA87F', nameof(UnicodeRanges.Phagspa))]
|
||||
[InlineData('\uA880', '\uA8DF', nameof(UnicodeRanges.Saurashtra))]
|
||||
[InlineData('\uA8E0', '\uA8FF', nameof(UnicodeRanges.DevanagariExtended))]
|
||||
[InlineData('\uA900', '\uA92F', nameof(UnicodeRanges.KayahLi))]
|
||||
[InlineData('\uA930', '\uA95F', nameof(UnicodeRanges.Rejang))]
|
||||
[InlineData('\uA960', '\uA97F', nameof(UnicodeRanges.HangulJamoExtendedA))]
|
||||
[InlineData('\uA980', '\uA9DF', nameof(UnicodeRanges.Javanese))]
|
||||
[InlineData('\uA9E0', '\uA9FF', nameof(UnicodeRanges.MyanmarExtendedB))]
|
||||
[InlineData('\uAA00', '\uAA5F', nameof(UnicodeRanges.Cham))]
|
||||
[InlineData('\uAA60', '\uAA7F', nameof(UnicodeRanges.MyanmarExtendedA))]
|
||||
[InlineData('\uAA80', '\uAADF', nameof(UnicodeRanges.TaiViet))]
|
||||
[InlineData('\uAAE0', '\uAAFF', nameof(UnicodeRanges.MeeteiMayekExtensions))]
|
||||
[InlineData('\uAB00', '\uAB2F', nameof(UnicodeRanges.EthiopicExtendedA))]
|
||||
[InlineData('\uAB30', '\uAB6F', nameof(UnicodeRanges.LatinExtendedE))]
|
||||
[InlineData('\uAB70', '\uABBF', nameof(UnicodeRanges.CherokeeSupplement))]
|
||||
[InlineData('\uABC0', '\uABFF', nameof(UnicodeRanges.MeeteiMayek))]
|
||||
[InlineData('\uAC00', '\uD7AF', nameof(UnicodeRanges.HangulSyllables))]
|
||||
[InlineData('\uD7B0', '\uD7FF', nameof(UnicodeRanges.HangulJamoExtendedB))]
|
||||
[InlineData('\uF900', '\uFAFF', nameof(UnicodeRanges.CJKCompatibilityIdeographs))]
|
||||
[InlineData('\uFB00', '\uFB4F', nameof(UnicodeRanges.AlphabeticPresentationForms))]
|
||||
[InlineData('\uFB50', '\uFDFF', nameof(UnicodeRanges.ArabicPresentationFormsA))]
|
||||
[InlineData('\uFE00', '\uFE0F', nameof(UnicodeRanges.VariationSelectors))]
|
||||
[InlineData('\uFE10', '\uFE1F', nameof(UnicodeRanges.VerticalForms))]
|
||||
[InlineData('\uFE20', '\uFE2F', nameof(UnicodeRanges.CombiningHalfMarks))]
|
||||
[InlineData('\uFE30', '\uFE4F', nameof(UnicodeRanges.CJKCompatibilityForms))]
|
||||
[InlineData('\uFE50', '\uFE6F', nameof(UnicodeRanges.SmallFormVariants))]
|
||||
[InlineData('\uFE70', '\uFEFF', nameof(UnicodeRanges.ArabicPresentationFormsB))]
|
||||
[InlineData('\uFF00', '\uFFEF', nameof(UnicodeRanges.HalfwidthandFullwidthForms))]
|
||||
[InlineData('\uFFF0', '\uFFFF', nameof(UnicodeRanges.Specials))]
|
||||
public void Range_Unicode(char first, char last, string blockName)
|
||||
{
|
||||
Assert.Equal(0x0, first & 0xF); // first char in any block should be U+nnn0
|
||||
Assert.Equal(0xF, last & 0xF); // last char in any block should be U+nnnF
|
||||
Assert.True(first < last); // code point ranges should be ordered
|
||||
|
||||
var propInfo = typeof(UnicodeRanges).GetProperty(blockName, BindingFlags.Public | BindingFlags.Static);
|
||||
Assert.NotNull(propInfo);
|
||||
|
||||
UnicodeRange range = (UnicodeRange)propInfo.GetValue(null);
|
||||
Assert.NotNull(range);
|
||||
|
||||
// Test 1: the range should span the range first..last
|
||||
Assert.Equal(first, range.FirstCodePoint);
|
||||
Assert.Equal(last, range.FirstCodePoint + range.RangeSize - 1);
|
||||
|
||||
// Test 2: calling the property multiple times should cache and return the same range instance
|
||||
UnicodeRange range2 = (UnicodeRange)propInfo.GetValue(null);
|
||||
Assert.Same(range, range2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,301 +0,0 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Extensions.WebEncoders
|
||||
{
|
||||
public class UrlEncoderTests
|
||||
{
|
||||
private static UTF8Encoding _utf8EncodingThrowOnInvalidBytes = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithCodePointFilter()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
||||
UrlEncoder encoder = new UrlEncoder(filter);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.UrlEncode("a"));
|
||||
Assert.Equal("b", encoder.UrlEncode("b"));
|
||||
Assert.Equal("%63", encoder.UrlEncode("c"));
|
||||
Assert.Equal("d", encoder.UrlEncode("d"));
|
||||
Assert.Equal("%00", encoder.UrlEncode("\0")); // we still always encode control chars
|
||||
Assert.Equal("%26", encoder.UrlEncode("&")); // we still always encode HTML-special chars
|
||||
Assert.Equal("%EF%BF%BF", encoder.UrlEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithUnicodeRanges()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("%61", encoder.UrlEncode("a"));
|
||||
Assert.Equal("\u00E9", encoder.UrlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("\u2601", encoder.UrlEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithNoParameters_DefaultsToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.UrlEncode("a"));
|
||||
Assert.Equal("%C3%A9", encoder.UrlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("%E2%98%81", encoder.UrlEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_EquivalentToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder controlEncoder = new UrlEncoder(UnicodeRanges.BasicLatin);
|
||||
UrlEncoder testEncoder = UrlEncoder.Default;
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
if (!IsSurrogateCodePoint(i))
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
Assert.Equal(controlEncoder.UrlEncode(input), testEncoder.UrlEncode(input));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_ReturnsSingletonInstance()
|
||||
{
|
||||
// Act
|
||||
UrlEncoder encoder1 = UrlEncoder.Default;
|
||||
UrlEncoder encoder2 = UrlEncoder.Default;
|
||||
|
||||
// Assert
|
||||
Assert.Same(encoder1, encoder2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_AllRangesAllowed_StillEncodesForbiddenChars()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder(UnicodeRanges.All);
|
||||
|
||||
// Act & assert - BMP chars
|
||||
for (int i = 0; i <= 0xFFFF; i++)
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
string expected;
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
expected = "%EF%BF%BD"; // unpaired surrogate -> Unicode replacement char
|
||||
}
|
||||
else
|
||||
{
|
||||
bool mustEncode = true;
|
||||
|
||||
// RFC 3987, Sec. 2.2 gives the list of allowed chars
|
||||
// (We allow 'ipchar' except for "'", "&", "+", "%", and "="
|
||||
if (('a' <= i && i <= 'z') || ('A' <= i && i <= 'Z') || ('0' <= i && i <= '9'))
|
||||
{
|
||||
mustEncode = false; // ALPHA / DIGIT
|
||||
}
|
||||
else if ((0x00A0 <= i && i <= 0xD7FF) | (0xF900 <= i && i <= 0xFDCF) | (0xFDF0 <= i && i <= 0xFFEF))
|
||||
{
|
||||
mustEncode = !UnicodeHelpers.IsCharacterDefined((char)i); // 'ucschar'
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (i)
|
||||
{
|
||||
// iunreserved
|
||||
case '-':
|
||||
case '.':
|
||||
case '_':
|
||||
case '~':
|
||||
|
||||
// isegment-nz-nc
|
||||
case '@':
|
||||
|
||||
// sub-delims
|
||||
case '!':
|
||||
case '$':
|
||||
case '(':
|
||||
case ')':
|
||||
case '*':
|
||||
case ',':
|
||||
case ';':
|
||||
mustEncode = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (mustEncode)
|
||||
{
|
||||
expected = GetKnownGoodPercentEncodedValue(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
expected = input; // no encoding
|
||||
}
|
||||
}
|
||||
|
||||
string retVal = encoder.UrlEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
// Act & assert - astral chars
|
||||
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
||||
{
|
||||
string input = Char.ConvertFromUtf32(i);
|
||||
string expected = GetKnownGoodPercentEncodedValue(i);
|
||||
string retVal = encoder.UrlEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder(UnicodeRanges.All); // allow all codepoints
|
||||
|
||||
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
||||
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
||||
const string expected = "a%EF%BF%BDb%EF%BF%BDc%EF%BF%BD%EF%BF%BDd%EF%BF%BD%F0%90%8F%BFe%EF%BF%BD"; // 'D800' 'DFFF' was preserved since it's valid
|
||||
|
||||
// Act
|
||||
string retVal = encoder.UrlEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_EmptyStringInput_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("", encoder.UrlEncode(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
string input = "Hello,there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Same(input, encoder.UrlEncode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_NullInput_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Null(encoder.UrlEncode(null));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_WithCharsRequiringEncodingAtBeginning()
|
||||
{
|
||||
Assert.Equal(@"%26Hello,there!", new UrlEncoder().UrlEncode("&Hello,there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_WithCharsRequiringEncodingAtEnd()
|
||||
{
|
||||
Assert.Equal(@"Hello,there!%26", new UrlEncoder().UrlEncode("Hello,there!&"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_WithCharsRequiringEncodingInMiddle()
|
||||
{
|
||||
Assert.Equal(@"Hello,%20%26there!", new UrlEncoder().UrlEncode("Hello, &there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_WithCharsRequiringEncodingInterspersed()
|
||||
{
|
||||
Assert.Equal(@"Hello,%20%3Cthere%3E!", new UrlEncoder().UrlEncode("Hello, <there>!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_CharArray()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.UrlEncode("Hello+world!".ToCharArray(), 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("lo%2Bwo", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_StringSubstring()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.UrlEncode("Hello+world!", 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("lo%2Bwo", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_DoesNotOutputHtmlSensitiveCharacters()
|
||||
{
|
||||
// Per the design document, we provide additional defense-in-depth
|
||||
// by never emitting HTML-sensitive characters unescaped.
|
||||
|
||||
// Arrange
|
||||
UrlEncoder urlEncoder = new UrlEncoder(UnicodeRanges.All);
|
||||
HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeRanges.All);
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= 0x10FFFF; i++)
|
||||
{
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
continue; // surrogates don't matter here
|
||||
}
|
||||
|
||||
string urlEncoded = urlEncoder.UrlEncode(Char.ConvertFromUtf32(i));
|
||||
string thenHtmlEncoded = htmlEncoder.HtmlEncode(urlEncoded);
|
||||
Assert.Equal(urlEncoded, thenHtmlEncoded); // should have contained no HTML-sensitive characters
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetKnownGoodPercentEncodedValue(int codePoint)
|
||||
{
|
||||
// Convert the code point to UTF16, then call Encoding.UTF8.GetBytes, then hex-encode everything
|
||||
return String.Concat(_utf8EncodingThrowOnInvalidBytes.GetBytes(Char.ConvertFromUtf32(codePoint)).Select(b => String.Format(CultureInfo.InvariantCulture, "%{0:X2}", b)));
|
||||
}
|
||||
|
||||
private static bool IsSurrogateCodePoint(int codePoint)
|
||||
{
|
||||
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,5 @@
|
|||
"System.Text.Encoding.Extensions": "4.0.11-beta-*"
|
||||
}
|
||||
}
|
||||
},
|
||||
"resource": "..\\..\\unicode\\UnicodeData.txt"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,298 +0,0 @@
|
|||
# Blocks-8.0.0.txt
|
||||
# Date: 2014-11-10, 23:04:00 GMT [KW]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2014 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Format:
|
||||
# Start Code..End Code; Block Name
|
||||
|
||||
# ================================================
|
||||
|
||||
# Note: When comparing block names, casing, whitespace, hyphens,
|
||||
# and underbars are ignored.
|
||||
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
|
||||
# For more information on the comparison of property values,
|
||||
# see UAX #44: http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# All block ranges start with a value where (cp MOD 16) = 0,
|
||||
# and end with a value where (cp MOD 16) = 15. In other words,
|
||||
# the last hexadecimal digit of the start of range is ...0
|
||||
# and the last hexadecimal digit of the end of range is ...F.
|
||||
# This constraint on block ranges guarantees that allocations
|
||||
# are done in terms of whole columns, and that code chart display
|
||||
# never involves splitting columns in the charts.
|
||||
#
|
||||
# All code points not explicitly listed for Block
|
||||
# have the value No_Block.
|
||||
|
||||
# Property: Block
|
||||
#
|
||||
# @missing: 0000..10FFFF; No_Block
|
||||
|
||||
0000..007F; Basic Latin
|
||||
0080..00FF; Latin-1 Supplement
|
||||
0100..017F; Latin Extended-A
|
||||
0180..024F; Latin Extended-B
|
||||
0250..02AF; IPA Extensions
|
||||
02B0..02FF; Spacing Modifier Letters
|
||||
0300..036F; Combining Diacritical Marks
|
||||
0370..03FF; Greek and Coptic
|
||||
0400..04FF; Cyrillic
|
||||
0500..052F; Cyrillic Supplement
|
||||
0530..058F; Armenian
|
||||
0590..05FF; Hebrew
|
||||
0600..06FF; Arabic
|
||||
0700..074F; Syriac
|
||||
0750..077F; Arabic Supplement
|
||||
0780..07BF; Thaana
|
||||
07C0..07FF; NKo
|
||||
0800..083F; Samaritan
|
||||
0840..085F; Mandaic
|
||||
08A0..08FF; Arabic Extended-A
|
||||
0900..097F; Devanagari
|
||||
0980..09FF; Bengali
|
||||
0A00..0A7F; Gurmukhi
|
||||
0A80..0AFF; Gujarati
|
||||
0B00..0B7F; Oriya
|
||||
0B80..0BFF; Tamil
|
||||
0C00..0C7F; Telugu
|
||||
0C80..0CFF; Kannada
|
||||
0D00..0D7F; Malayalam
|
||||
0D80..0DFF; Sinhala
|
||||
0E00..0E7F; Thai
|
||||
0E80..0EFF; Lao
|
||||
0F00..0FFF; Tibetan
|
||||
1000..109F; Myanmar
|
||||
10A0..10FF; Georgian
|
||||
1100..11FF; Hangul Jamo
|
||||
1200..137F; Ethiopic
|
||||
1380..139F; Ethiopic Supplement
|
||||
13A0..13FF; Cherokee
|
||||
1400..167F; Unified Canadian Aboriginal Syllabics
|
||||
1680..169F; Ogham
|
||||
16A0..16FF; Runic
|
||||
1700..171F; Tagalog
|
||||
1720..173F; Hanunoo
|
||||
1740..175F; Buhid
|
||||
1760..177F; Tagbanwa
|
||||
1780..17FF; Khmer
|
||||
1800..18AF; Mongolian
|
||||
18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
|
||||
1900..194F; Limbu
|
||||
1950..197F; Tai Le
|
||||
1980..19DF; New Tai Lue
|
||||
19E0..19FF; Khmer Symbols
|
||||
1A00..1A1F; Buginese
|
||||
1A20..1AAF; Tai Tham
|
||||
1AB0..1AFF; Combining Diacritical Marks Extended
|
||||
1B00..1B7F; Balinese
|
||||
1B80..1BBF; Sundanese
|
||||
1BC0..1BFF; Batak
|
||||
1C00..1C4F; Lepcha
|
||||
1C50..1C7F; Ol Chiki
|
||||
1CC0..1CCF; Sundanese Supplement
|
||||
1CD0..1CFF; Vedic Extensions
|
||||
1D00..1D7F; Phonetic Extensions
|
||||
1D80..1DBF; Phonetic Extensions Supplement
|
||||
1DC0..1DFF; Combining Diacritical Marks Supplement
|
||||
1E00..1EFF; Latin Extended Additional
|
||||
1F00..1FFF; Greek Extended
|
||||
2000..206F; General Punctuation
|
||||
2070..209F; Superscripts and Subscripts
|
||||
20A0..20CF; Currency Symbols
|
||||
20D0..20FF; Combining Diacritical Marks for Symbols
|
||||
2100..214F; Letterlike Symbols
|
||||
2150..218F; Number Forms
|
||||
2190..21FF; Arrows
|
||||
2200..22FF; Mathematical Operators
|
||||
2300..23FF; Miscellaneous Technical
|
||||
2400..243F; Control Pictures
|
||||
2440..245F; Optical Character Recognition
|
||||
2460..24FF; Enclosed Alphanumerics
|
||||
2500..257F; Box Drawing
|
||||
2580..259F; Block Elements
|
||||
25A0..25FF; Geometric Shapes
|
||||
2600..26FF; Miscellaneous Symbols
|
||||
2700..27BF; Dingbats
|
||||
27C0..27EF; Miscellaneous Mathematical Symbols-A
|
||||
27F0..27FF; Supplemental Arrows-A
|
||||
2800..28FF; Braille Patterns
|
||||
2900..297F; Supplemental Arrows-B
|
||||
2980..29FF; Miscellaneous Mathematical Symbols-B
|
||||
2A00..2AFF; Supplemental Mathematical Operators
|
||||
2B00..2BFF; Miscellaneous Symbols and Arrows
|
||||
2C00..2C5F; Glagolitic
|
||||
2C60..2C7F; Latin Extended-C
|
||||
2C80..2CFF; Coptic
|
||||
2D00..2D2F; Georgian Supplement
|
||||
2D30..2D7F; Tifinagh
|
||||
2D80..2DDF; Ethiopic Extended
|
||||
2DE0..2DFF; Cyrillic Extended-A
|
||||
2E00..2E7F; Supplemental Punctuation
|
||||
2E80..2EFF; CJK Radicals Supplement
|
||||
2F00..2FDF; Kangxi Radicals
|
||||
2FF0..2FFF; Ideographic Description Characters
|
||||
3000..303F; CJK Symbols and Punctuation
|
||||
3040..309F; Hiragana
|
||||
30A0..30FF; Katakana
|
||||
3100..312F; Bopomofo
|
||||
3130..318F; Hangul Compatibility Jamo
|
||||
3190..319F; Kanbun
|
||||
31A0..31BF; Bopomofo Extended
|
||||
31C0..31EF; CJK Strokes
|
||||
31F0..31FF; Katakana Phonetic Extensions
|
||||
3200..32FF; Enclosed CJK Letters and Months
|
||||
3300..33FF; CJK Compatibility
|
||||
3400..4DBF; CJK Unified Ideographs Extension A
|
||||
4DC0..4DFF; Yijing Hexagram Symbols
|
||||
4E00..9FFF; CJK Unified Ideographs
|
||||
A000..A48F; Yi Syllables
|
||||
A490..A4CF; Yi Radicals
|
||||
A4D0..A4FF; Lisu
|
||||
A500..A63F; Vai
|
||||
A640..A69F; Cyrillic Extended-B
|
||||
A6A0..A6FF; Bamum
|
||||
A700..A71F; Modifier Tone Letters
|
||||
A720..A7FF; Latin Extended-D
|
||||
A800..A82F; Syloti Nagri
|
||||
A830..A83F; Common Indic Number Forms
|
||||
A840..A87F; Phags-pa
|
||||
A880..A8DF; Saurashtra
|
||||
A8E0..A8FF; Devanagari Extended
|
||||
A900..A92F; Kayah Li
|
||||
A930..A95F; Rejang
|
||||
A960..A97F; Hangul Jamo Extended-A
|
||||
A980..A9DF; Javanese
|
||||
A9E0..A9FF; Myanmar Extended-B
|
||||
AA00..AA5F; Cham
|
||||
AA60..AA7F; Myanmar Extended-A
|
||||
AA80..AADF; Tai Viet
|
||||
AAE0..AAFF; Meetei Mayek Extensions
|
||||
AB00..AB2F; Ethiopic Extended-A
|
||||
AB30..AB6F; Latin Extended-E
|
||||
AB70..ABBF; Cherokee Supplement
|
||||
ABC0..ABFF; Meetei Mayek
|
||||
AC00..D7AF; Hangul Syllables
|
||||
D7B0..D7FF; Hangul Jamo Extended-B
|
||||
D800..DB7F; High Surrogates
|
||||
DB80..DBFF; High Private Use Surrogates
|
||||
DC00..DFFF; Low Surrogates
|
||||
E000..F8FF; Private Use Area
|
||||
F900..FAFF; CJK Compatibility Ideographs
|
||||
FB00..FB4F; Alphabetic Presentation Forms
|
||||
FB50..FDFF; Arabic Presentation Forms-A
|
||||
FE00..FE0F; Variation Selectors
|
||||
FE10..FE1F; Vertical Forms
|
||||
FE20..FE2F; Combining Half Marks
|
||||
FE30..FE4F; CJK Compatibility Forms
|
||||
FE50..FE6F; Small Form Variants
|
||||
FE70..FEFF; Arabic Presentation Forms-B
|
||||
FF00..FFEF; Halfwidth and Fullwidth Forms
|
||||
FFF0..FFFF; Specials
|
||||
10000..1007F; Linear B Syllabary
|
||||
10080..100FF; Linear B Ideograms
|
||||
10100..1013F; Aegean Numbers
|
||||
10140..1018F; Ancient Greek Numbers
|
||||
10190..101CF; Ancient Symbols
|
||||
101D0..101FF; Phaistos Disc
|
||||
10280..1029F; Lycian
|
||||
102A0..102DF; Carian
|
||||
102E0..102FF; Coptic Epact Numbers
|
||||
10300..1032F; Old Italic
|
||||
10330..1034F; Gothic
|
||||
10350..1037F; Old Permic
|
||||
10380..1039F; Ugaritic
|
||||
103A0..103DF; Old Persian
|
||||
10400..1044F; Deseret
|
||||
10450..1047F; Shavian
|
||||
10480..104AF; Osmanya
|
||||
10500..1052F; Elbasan
|
||||
10530..1056F; Caucasian Albanian
|
||||
10600..1077F; Linear A
|
||||
10800..1083F; Cypriot Syllabary
|
||||
10840..1085F; Imperial Aramaic
|
||||
10860..1087F; Palmyrene
|
||||
10880..108AF; Nabataean
|
||||
108E0..108FF; Hatran
|
||||
10900..1091F; Phoenician
|
||||
10920..1093F; Lydian
|
||||
10980..1099F; Meroitic Hieroglyphs
|
||||
109A0..109FF; Meroitic Cursive
|
||||
10A00..10A5F; Kharoshthi
|
||||
10A60..10A7F; Old South Arabian
|
||||
10A80..10A9F; Old North Arabian
|
||||
10AC0..10AFF; Manichaean
|
||||
10B00..10B3F; Avestan
|
||||
10B40..10B5F; Inscriptional Parthian
|
||||
10B60..10B7F; Inscriptional Pahlavi
|
||||
10B80..10BAF; Psalter Pahlavi
|
||||
10C00..10C4F; Old Turkic
|
||||
10C80..10CFF; Old Hungarian
|
||||
10E60..10E7F; Rumi Numeral Symbols
|
||||
11000..1107F; Brahmi
|
||||
11080..110CF; Kaithi
|
||||
110D0..110FF; Sora Sompeng
|
||||
11100..1114F; Chakma
|
||||
11150..1117F; Mahajani
|
||||
11180..111DF; Sharada
|
||||
111E0..111FF; Sinhala Archaic Numbers
|
||||
11200..1124F; Khojki
|
||||
11280..112AF; Multani
|
||||
112B0..112FF; Khudawadi
|
||||
11300..1137F; Grantha
|
||||
11480..114DF; Tirhuta
|
||||
11580..115FF; Siddham
|
||||
11600..1165F; Modi
|
||||
11680..116CF; Takri
|
||||
11700..1173F; Ahom
|
||||
118A0..118FF; Warang Citi
|
||||
11AC0..11AFF; Pau Cin Hau
|
||||
12000..123FF; Cuneiform
|
||||
12400..1247F; Cuneiform Numbers and Punctuation
|
||||
12480..1254F; Early Dynastic Cuneiform
|
||||
13000..1342F; Egyptian Hieroglyphs
|
||||
14400..1467F; Anatolian Hieroglyphs
|
||||
16800..16A3F; Bamum Supplement
|
||||
16A40..16A6F; Mro
|
||||
16AD0..16AFF; Bassa Vah
|
||||
16B00..16B8F; Pahawh Hmong
|
||||
16F00..16F9F; Miao
|
||||
1B000..1B0FF; Kana Supplement
|
||||
1BC00..1BC9F; Duployan
|
||||
1BCA0..1BCAF; Shorthand Format Controls
|
||||
1D000..1D0FF; Byzantine Musical Symbols
|
||||
1D100..1D1FF; Musical Symbols
|
||||
1D200..1D24F; Ancient Greek Musical Notation
|
||||
1D300..1D35F; Tai Xuan Jing Symbols
|
||||
1D360..1D37F; Counting Rod Numerals
|
||||
1D400..1D7FF; Mathematical Alphanumeric Symbols
|
||||
1D800..1DAAF; Sutton SignWriting
|
||||
1E800..1E8DF; Mende Kikakui
|
||||
1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
|
||||
1F000..1F02F; Mahjong Tiles
|
||||
1F030..1F09F; Domino Tiles
|
||||
1F0A0..1F0FF; Playing Cards
|
||||
1F100..1F1FF; Enclosed Alphanumeric Supplement
|
||||
1F200..1F2FF; Enclosed Ideographic Supplement
|
||||
1F300..1F5FF; Miscellaneous Symbols and Pictographs
|
||||
1F600..1F64F; Emoticons
|
||||
1F650..1F67F; Ornamental Dingbats
|
||||
1F680..1F6FF; Transport and Map Symbols
|
||||
1F700..1F77F; Alchemical Symbols
|
||||
1F780..1F7FF; Geometric Shapes Extended
|
||||
1F800..1F8FF; Supplemental Arrows-C
|
||||
1F900..1F9FF; Supplemental Symbols and Pictographs
|
||||
20000..2A6DF; CJK Unified Ideographs Extension B
|
||||
2A700..2B73F; CJK Unified Ideographs Extension C
|
||||
2B740..2B81F; CJK Unified Ideographs Extension D
|
||||
2B820..2CEAF; CJK Unified Ideographs Extension E
|
||||
2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
||||
E0000..E007F; Tags
|
||||
E0100..E01EF; Variation Selectors Supplement
|
||||
F0000..FFFFF; Supplementary Private Use Area-A
|
||||
100000..10FFFF; Supplementary Private Use Area-B
|
||||
|
||||
# EOF
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.1" />
|
||||
</startup>
|
||||
</configuration>
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>DefinedCharListGenerator</RootNamespace>
|
||||
<AssemblyName>DefinedCharListGenerator</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="..\..\UnicodeData.txt">
|
||||
<Link>UnicodeData.txt</Link>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
|
|
@ -1,180 +0,0 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
|
||||
namespace DefinedCharListGenerator
|
||||
{
|
||||
/// <summary>
|
||||
/// This program outputs the 'unicode-defined-chars.bin' bitmap file.
|
||||
/// </summary>
|
||||
class Program
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
// The input file should be UnicodeData.txt from the UCD corresponding to the
|
||||
// version of the Unicode spec we're consuming.
|
||||
// More info: http://www.unicode.org/reports/tr44/tr44-14.html#UCD_Files
|
||||
// Latest UnicodeData.txt: http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
|
||||
const uint MAX_UNICODE_CHAR = 0x10FFFF; // Unicode range is U+0000 .. U+10FFFF
|
||||
bool[] definedChars = new bool[MAX_UNICODE_CHAR + 1];
|
||||
Dictionary<string, Span> spans = new Dictionary<string, Span>();
|
||||
|
||||
// Read all defined characters from the input file.
|
||||
string[] allLines = File.ReadAllLines("UnicodeData.txt");
|
||||
|
||||
// Each line is a semicolon-delimited list of information:
|
||||
// <value>;<name>;<category>;...
|
||||
foreach (string line in allLines)
|
||||
{
|
||||
string[] splitLine = line.Split(new char[] { ';' }, 4);
|
||||
uint codepoint = uint.Parse(splitLine[0], NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
|
||||
string rawName = splitLine[1];
|
||||
string category = splitLine[2];
|
||||
|
||||
// spans go into their own dictionary for later processing
|
||||
string spanName;
|
||||
bool isStartOfSpan;
|
||||
if (IsSpanDefinition(rawName, out spanName, out isStartOfSpan))
|
||||
{
|
||||
if (isStartOfSpan)
|
||||
{
|
||||
spans.Add(spanName, new Span() { FirstCodePoint = codepoint, Category = category });
|
||||
}
|
||||
else
|
||||
{
|
||||
var existingSpan = spans[spanName];
|
||||
Debug.Assert(existingSpan.FirstCodePoint != 0, "We should've seen the start of this span already.");
|
||||
Debug.Assert(existingSpan.LastCodePoint == 0, "We shouldn't have seen the end of this span already.");
|
||||
Debug.Assert(existingSpan.Category == category, "Span start Unicode category doesn't match span end Unicode category.");
|
||||
existingSpan.LastCodePoint = codepoint;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// We only allow certain categories of code points.
|
||||
// Zs (space separators) aren't included, but we allow U+0020 SPACE as a special case
|
||||
|
||||
if (!(codepoint == (uint)' ' || IsAllowedUnicodeCategory(category)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Debug.Assert(codepoint <= MAX_UNICODE_CHAR);
|
||||
definedChars[codepoint] = true;
|
||||
}
|
||||
|
||||
// Next, populate characters that weren't defined on their own lines
|
||||
// but which are instead defined as members of a named span.
|
||||
foreach (var span in spans.Values)
|
||||
{
|
||||
if (IsAllowedUnicodeCategory(span.Category))
|
||||
{
|
||||
Debug.Assert(span.FirstCodePoint <= MAX_UNICODE_CHAR);
|
||||
Debug.Assert(span.LastCodePoint <= MAX_UNICODE_CHAR);
|
||||
for (uint i = span.FirstCodePoint; i <= span.LastCodePoint; i++)
|
||||
{
|
||||
definedChars[i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, write the list of defined characters out as a bitmap.
|
||||
// Each consecutive block of 8 chars is written as a single byte.
|
||||
// For instance, the first byte of the output file contains the
|
||||
// bitmap for the following codepoints:
|
||||
// - (bit 7) U+0007 [MSB]
|
||||
// - (bit 6) U+0006
|
||||
// - (bit 5) U+0005
|
||||
// - (bit 4) U+0004
|
||||
// - (bit 3) U+0003
|
||||
// - (bit 2) U+0002
|
||||
// - (bit 1) U+0001
|
||||
// - (bit 0) U+0000 [LSB]
|
||||
// The next byte will contain the bitmap for U+000F to U+0008,
|
||||
// and so on until the last byte, which is U+FFFF to U+FFF8.
|
||||
// The bytes are written out in little-endian order.
|
||||
// We're only concerned about the BMP (U+0000 .. U+FFFF) for now.
|
||||
MemoryStream outBuffer = new MemoryStream();
|
||||
for (int i = 0; i < 0x10000; i += 8)
|
||||
{
|
||||
int thisByte = 0;
|
||||
for (int j = 7; j >= 0; j--)
|
||||
{
|
||||
thisByte <<= 1;
|
||||
if (definedChars[i + j])
|
||||
{
|
||||
thisByte |= 0x1;
|
||||
}
|
||||
}
|
||||
outBuffer.WriteByte((byte)thisByte);
|
||||
}
|
||||
|
||||
File.WriteAllBytes("unicode-defined-chars.bin", outBuffer.ToArray());
|
||||
}
|
||||
|
||||
private static bool IsAllowedUnicodeCategory(string category)
|
||||
{
|
||||
// We only allow certain classes of characters
|
||||
return category == "Lu" /* letters */
|
||||
|| category == "Ll"
|
||||
|| category == "Lt"
|
||||
|| category == "Lm"
|
||||
|| category == "Lo"
|
||||
|| category == "Mn" /* marks */
|
||||
|| category == "Mc"
|
||||
|| category == "Me"
|
||||
|| category == "Nd" /* numbers */
|
||||
|| category == "Nl"
|
||||
|| category == "No"
|
||||
|| category == "Pc" /* punctuation */
|
||||
|| category == "Pd"
|
||||
|| category == "Ps"
|
||||
|| category == "Pe"
|
||||
|| category == "Pi"
|
||||
|| category == "Pf"
|
||||
|| category == "Po"
|
||||
|| category == "Sm" /* symbols */
|
||||
|| category == "Sc"
|
||||
|| category == "Sk"
|
||||
|| category == "So"
|
||||
|| category == "Cf"; /* other */
|
||||
}
|
||||
|
||||
private static bool IsSpanDefinition(string rawName, out string spanName, out bool isStartOfSpan)
|
||||
{
|
||||
// Spans are represented within angle brackets, such as the following:
|
||||
// DC00;<Low Surrogate, First>;Cs;0;L;;;;;N;;;;;
|
||||
// DFFF;<Low Surrogate, Last>;Cs;0;L;;;;;N;;;;;
|
||||
if (rawName.StartsWith("<", StringComparison.Ordinal))
|
||||
{
|
||||
if (rawName.EndsWith(", First>", StringComparison.Ordinal))
|
||||
{
|
||||
spanName = rawName.Substring(1, rawName.Length - 1 - ", First>".Length);
|
||||
isStartOfSpan = true;
|
||||
return true;
|
||||
}
|
||||
else if (rawName.EndsWith(", Last>", StringComparison.Ordinal))
|
||||
{
|
||||
spanName = rawName.Substring(1, rawName.Length - 1 - ", Last>".Length);
|
||||
isStartOfSpan = false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// not surrounded by <>, or <control> or some other non-span
|
||||
spanName = null;
|
||||
isStartOfSpan = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
private class Span
|
||||
{
|
||||
public uint FirstCodePoint;
|
||||
public uint LastCodePoint;
|
||||
public string Category;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("DefinedCharListGenerator")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("DefinedCharListGenerator")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2015")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("5089f890-38f7-413c-87b0-d8eb1e238ef5")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2013
|
||||
VisualStudioVersion = 12.0.31101.0
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DefinedCharListGenerator", "DefinedCharListGenerator\DefinedCharListGenerator.csproj", "{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnicodeTablesGenerator", "UnicodeTablesGenerator\UnicodeTablesGenerator.csproj", "{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.1" />
|
||||
</startup>
|
||||
</configuration>
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace UnicodeTablesGenerator
|
||||
{
|
||||
/// <summary>
|
||||
/// This program outputs the 'UnicodeBlocks.generated.txt' and
|
||||
/// 'UnicodeBlocksTests.generated.txt' source files.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The generated files require some hand-tweaking. For instance, you'll need
|
||||
/// to remove surrogates and private use blocks. The files can then be merged
|
||||
/// into the *.generated.cs files as appropriate.
|
||||
/// </remarks>
|
||||
class Program
|
||||
{
|
||||
private const string _codePointFiltersGeneratedFormat = @"
|
||||
/// <summary>
|
||||
/// A <see cref=""UnicodeRange""/> corresponding to the '{0}' Unicode block (U+{1}..U+{2}).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// See http://www.unicode.org/charts/PDF/U{1}.pdf for the full set of characters in this block.
|
||||
/// </remarks>
|
||||
public static UnicodeRange {3} => Volatile.Read(ref _{4}) ?? CreateRange(ref _{4}, first: '\u{1}', last: '\u{2}');
|
||||
private static UnicodeRange _{4};
|
||||
";
|
||||
|
||||
private const string _codePointFiltersTestsGeneratedFormat = @"[InlineData('\u{1}', '\u{2}', nameof(UnicodeRanges.{0}))]";
|
||||
|
||||
private static void Main()
|
||||
{
|
||||
// The input file should be Blocks.txt from the UCD corresponding to the
|
||||
// version of the Unicode spec we're consuming.
|
||||
// More info: http://www.unicode.org/reports/tr44/
|
||||
// Latest Blocks.txt: http://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
|
||||
|
||||
StringBuilder runtimeCodeBuilder = new StringBuilder();
|
||||
StringBuilder testCodeBuilder = new StringBuilder();
|
||||
string[] allLines = File.ReadAllLines("Blocks.txt");
|
||||
|
||||
Regex regex = new Regex(@"^(?<startCode>[0-9A-F]{4})\.\.(?<endCode>[0-9A-F]{4}); (?<blockName>.+)$");
|
||||
|
||||
foreach (var line in allLines)
|
||||
{
|
||||
// We only care about lines of the form "XXXX..XXXX; Block name"
|
||||
var match = regex.Match(line);
|
||||
if (match == null || !match.Success)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
string startCode = match.Groups["startCode"].Value;
|
||||
string endCode = match.Groups["endCode"].Value;
|
||||
string blockName = match.Groups["blockName"].Value;
|
||||
string blockNameAsProperty = RemoveAllNonAlphanumeric(blockName);
|
||||
string blockNameAsField = WithDotNetFieldCasing(blockNameAsProperty);
|
||||
|
||||
runtimeCodeBuilder.AppendFormat(CultureInfo.InvariantCulture, _codePointFiltersGeneratedFormat,
|
||||
blockName, startCode, endCode, blockNameAsProperty, blockNameAsField);
|
||||
|
||||
testCodeBuilder.AppendFormat(CultureInfo.InvariantCulture, _codePointFiltersTestsGeneratedFormat,
|
||||
blockNameAsProperty, startCode, endCode);
|
||||
testCodeBuilder.AppendLine();
|
||||
}
|
||||
|
||||
File.WriteAllText("UnicodeRanges.generated.txt", runtimeCodeBuilder.ToString());
|
||||
File.WriteAllText("UnicodeRangesTests.generated.txt", testCodeBuilder.ToString());
|
||||
}
|
||||
|
||||
private static string RemoveAllNonAlphanumeric(string blockName)
|
||||
{
|
||||
// Allow only A-Z 0-9
|
||||
return new String(blockName.ToCharArray().Where(c => ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')).ToArray());
|
||||
}
|
||||
|
||||
private static string WithDotNetFieldCasing(string input)
|
||||
{
|
||||
char[] chars = input.ToCharArray();
|
||||
for (int i = 0; i < chars.Length; i++)
|
||||
{
|
||||
if (Char.IsLower(chars[i]))
|
||||
{
|
||||
if (i > 1)
|
||||
{
|
||||
// restore original casing for the previous char unless the previous
|
||||
// char was at the front of the string
|
||||
chars[i - 1] = input[i - 1];
|
||||
}
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
chars[i] = Char.ToLowerInvariant(chars[i]);
|
||||
}
|
||||
}
|
||||
return new String(chars);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("UnicodeTablesGenerator")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("UnicodeTablesGenerator")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2015")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("c9286457-3d25-4143-9458-028aabedc4f5")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>UnicodeTablesGenerator</RootNamespace>
|
||||
<AssemblyName>UnicodeTablesGenerator</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="..\..\Blocks.txt">
|
||||
<Link>Blocks.txt</Link>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
29215
unicode/UnicodeData.txt
29215
unicode/UnicodeData.txt
File diff suppressed because it is too large
Load Diff
|
|
@ -1,94 +0,0 @@
|
|||
This document contains instructions for updating the Unicode data set used by
|
||||
the WebEncoders project.
|
||||
|
||||
1) Download the latest UnicodeData.txt and Blocks.txt from the Unicode
|
||||
Consortium web site. These files are normally found under
|
||||
http://www.unicode.org/Public/X.Y.Z/ucd/, where X.Y.Z is the version of the
|
||||
Unicode specification of interest. Replace the UnicodeData.txt and
|
||||
Blocks.txt files in this folder with the files you downloaded.
|
||||
|
||||
2) Update unicode-copyright.txt in this folder with the following information:
|
||||
- The exact URLs where you downloaded UnicodeData.txt and Blocks.txt.
|
||||
- The date on which you downloaded these two files.
|
||||
- The Unicode copyright and permission notice, if it has changed. The latest
|
||||
copyright and permission notice can be found at the bottom of
|
||||
http://www.unicode.org/copyright.html.
|
||||
|
||||
3) Open the Generators solution and run the DefinedCharListGenerator project.
|
||||
Running this will drop a file unicode-defined-chars.bin into the output
|
||||
folder. Move this file into the following directory, overwriting the
|
||||
existing file in that directory:
|
||||
src\Microsoft.Extensions.WebEncoders.Core\compiler\resources
|
||||
|
||||
4) Open the Generators solution and run the UnicodeTablesGenerator project.
|
||||
Running this will drop two files UnicodeRanges.generated.txt and
|
||||
UnicodeRangesTests.generated.txt into the output folder.
|
||||
|
||||
5) Open UnicodeRanges.generated.txt in your favorite text editor. You'll see
|
||||
that the file contains all of the parsed Unicode block information in
|
||||
ascending code point order. Manually REMOVE the following blocks from this
|
||||
text file and re-save it.
|
||||
- High Surrogates (U+D800..U+DB7F)
|
||||
- High Private Use Surrogates (U+DB80..U+DBFF)
|
||||
- Low Surrogates (U+DC00..U+DFFF)
|
||||
- Private Use Area (U+E000..U+F8FF)
|
||||
|
||||
6) Open src\Microsoft.Extensions.WebEncoders.Core\UnicodeRanges.generated.cs in
|
||||
your IDE. Delete everything within the partial class definition and replace
|
||||
it with the contents of UnicodeRanges.generated.txt. (Remember to remove
|
||||
the blocks mentioned in the previous step, otherwise unit tests will fail.)
|
||||
|
||||
Open src\Microsoft.Extensions.WebEncoders.Core\UnicodeRanges.cs in your IDE.
|
||||
Update the doc comment at the top of the class to reflect the appropriate
|
||||
version of the Unicode specification.
|
||||
|
||||
7) Open UnicodeRangesTests.generated.txt in your favorite text editor. Just
|
||||
like in the previous .txt file, you'll need to remove the [InlineData]
|
||||
lines which map to the Unicode blocks which were manually removed.
|
||||
See step (5) for the list of which blocks must be removed. Then re-save
|
||||
this file.
|
||||
|
||||
8) Open test\Microsoft.Extensions.WebEncoders.Tests\UnicodeRangesTests.cs in
|
||||
your IDE. Delete all of the [InlineData] attributes on the Range_Unicode
|
||||
test, then paste the contents of UnicodeRangesTests.generated.txt in
|
||||
to restore the new [InlineData] list.
|
||||
|
||||
IMPORTANT: Don't delete the [Theory] attribute on this method!
|
||||
|
||||
9) Open test\Microsoft.Extensions.WebEncoders.Tests\UnicodeHelpersTests.cs in
|
||||
your IDE. Scroll to the bottom of the ReadListOfDefinedCharacters method,
|
||||
and you'll see a section where the test special-cases CJK Ideographs and
|
||||
Hangul Syllables. As more characters are added to the Unicode specification
|
||||
the list of valid CJK Ideographs and Hangul Syllables can grow, so make sure
|
||||
these match up with the relevant lines in UnicodeData.txt. For instance, at
|
||||
the time of this writing UnicodeData.txt lists the valid Hangul Syllable
|
||||
character range as follows:
|
||||
|
||||
AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
|
||||
D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
|
||||
|
||||
If necessary, update the logic in the ReadListOfDefinedCharacters method to
|
||||
account for any changes to these lines in UnicodeData.txt.
|
||||
|
||||
That's it! Run the unit tests and everything should be good to go. If you find
|
||||
any stray comments throughout the code base that reference a specific version
|
||||
of the Unicode specification, go ahead and update them so that they correctly
|
||||
reflect the version you just submitted.
|
||||
|
||||
To recap, the files you should check in are:
|
||||
|
||||
src\Microsoft.Extensions.WebEncoders.Core\compiler\resources\
|
||||
unicode-defined-chars.bin
|
||||
|
||||
src\Microsoft.Extensions.WebEncoders.Core\
|
||||
UnicodeRanges.cs
|
||||
UnicodeRanges.generated.cs
|
||||
|
||||
test\Microsoft.Extensions.WebEncoders.Tests\
|
||||
UnicodeHelpersTests.cs (if necessary, see step 9)
|
||||
UnicodeRangesTests.cs
|
||||
|
||||
unicode\
|
||||
Blocks.txt
|
||||
unicode-copyright.txt
|
||||
UnicodeData.txt
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
The files Blocks.txt and UnicodeData.txt in this directory were
|
||||
retrieved from the following URLs on Saturday, September 5, 2015.
|
||||
|
||||
http://www.unicode.org/Public/8.0.0/ucd/Blocks.txt
|
||||
http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt
|
||||
|
||||
The below copyright notice applies to these files.
|
||||
|
||||
========================================================================
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2015 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in
|
||||
http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software,
|
||||
(b) this copyright and permission notice appear in associated
|
||||
documentation, and
|
||||
(c) there is clear notice in each modified Data File or in the Software
|
||||
as well as in the documentation associated with the Data File(s) or
|
||||
Software that the data or software has been modified.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
Loading…
Reference in New Issue