Doc comment cleanup, API refactorings

Rename CodePointFilters -> UnicodeBlocks
Rework allowed / disallowed code point APIs for ease of use
Move service registration APIs into WebEncoders project
This commit is contained in:
Levi B 2015-02-24 11:51:41 -08:00
parent 024c72b05a
commit c5dc9abff6
19 changed files with 2922 additions and 2671 deletions

View File

@ -3,9 +3,7 @@
"description": "ASP.NET 5 common extension methods for HTTP abstractions and IApplicationBuilder.",
"dependencies": {
"Microsoft.AspNet.Http": "1.0.0-*",
"Microsoft.Framework.WebEncoders": "1.0.0-*",
"Microsoft.Framework.DependencyInjection": "1.0.0-*",
"Microsoft.Framework.OptionsModel": "1.0.0-*",
"Microsoft.Net.Http.Headers": "1.0.0-*"
},
"frameworks" : {

View File

@ -0,0 +1,65 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Diagnostics;
namespace Microsoft.Framework.WebEncoders
{
internal struct AllowedCharsBitmap
{
private const int ALLOWED_CHARS_BITMAP_LENGTH = 0x10000 / (8 * sizeof(uint));
private uint[] _allowedCharsBitmap;
public AllowedCharsBitmap()
{
_allowedCharsBitmap = new uint[ALLOWED_CHARS_BITMAP_LENGTH];
}
// Marks a character as allowed (can be returned unencoded)
public void AllowCharacter(char c)
{
uint codePoint = (uint)c;
int index = (int)(codePoint >> 5);
int offset = (int)(codePoint & 0x1FU);
_allowedCharsBitmap[index] |= 0x1U << offset;
}
public AllowedCharsBitmap Clone()
{
AllowedCharsBitmap retVal;
retVal._allowedCharsBitmap = (uint[])this._allowedCharsBitmap.Clone();
return retVal;
}
// Marks a character as forbidden (must be returned encoded)
public void ForbidCharacter(char c)
{
uint codePoint = (uint)c;
int index = (int)(codePoint >> 5);
int offset = (int)(codePoint & 0x1FU);
_allowedCharsBitmap[index] &= ~(0x1U << offset);
}
public void ForbidUndefinedCharacters()
{
// Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
// (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
uint[] definedCharactersBitmap = UnicodeHelpers.GetDefinedCharacterBitmap();
Debug.Assert(definedCharactersBitmap.Length == _allowedCharsBitmap.Length);
for (int i = 0; i < _allowedCharsBitmap.Length; i++)
{
_allowedCharsBitmap[i] &= definedCharactersBitmap[i];
}
}
// Determines whether the given character can be returned unencoded.
public bool IsCharacterAllowed(char c)
{
uint codePoint = (uint)c;
int index = (int)(codePoint >> 5);
int offset = (int)(codePoint & 0x1FU);
return ((_allowedCharsBitmap[index] >> offset) & 0x1U) != 0;
}
}
}

View File

@ -0,0 +1,273 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using Microsoft.Framework.Internal;
namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Represents a filter which allows only certain Unicode code points through.
/// </summary>
public sealed class CodePointFilter : ICodePointFilter
{
private AllowedCharsBitmap _allowedCharsBitmap;
/// <summary>
/// Instantiates the filter allowing only the 'Basic Latin' block of characters through.
/// </summary>
public CodePointFilter()
{
_allowedCharsBitmap = new AllowedCharsBitmap();
AllowBlock(UnicodeBlocks.BasicLatin);
}
/// <summary>
/// Instantiates the filter by cloning the allow list of another filter.
/// </summary>
public CodePointFilter([NotNull] ICodePointFilter other)
{
CodePointFilter otherAsCodePointFilter = other as CodePointFilter;
if (otherAsCodePointFilter != null)
{
_allowedCharsBitmap = otherAsCodePointFilter.GetAllowedCharsBitmap();
}
else
{
_allowedCharsBitmap = new AllowedCharsBitmap();
AllowFilter(other);
}
}
/// <summary>
/// Instantiates the filter where only the provided Unicode character blocks are
/// allowed by the filter.
/// </summary>
/// <param name="allowedBlocks"></param>
public CodePointFilter(params UnicodeBlock[] allowedBlocks)
{
_allowedCharsBitmap = new AllowedCharsBitmap();
AllowBlocks(allowedBlocks);
}
/// <summary>
/// Allows all characters in the specified Unicode character block through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowBlock([NotNull] UnicodeBlock block)
{
int firstCodePoint = block.FirstCodePoint;
int blockSize = block.BlockSize;
for (int i = 0; i < blockSize; i++)
{
_allowedCharsBitmap.AllowCharacter((char)(firstCodePoint + i));
}
return this;
}
/// <summary>
/// Allows all characters in the specified Unicode character blocks through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowBlocks(params UnicodeBlock[] blocks)
{
if (blocks != null)
{
for (int i = 0; i < blocks.Length; i++)
{
AllowBlock(blocks[i]);
}
}
return this;
}
/// <summary>
/// Allows the specified character through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowChar(char c)
{
_allowedCharsBitmap.AllowCharacter(c);
return this;
}
/// <summary>
/// Allows the specified characters through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowChars(params char[] chars)
{
if (chars != null)
{
for (int i = 0; i < chars.Length; i++)
{
_allowedCharsBitmap.AllowCharacter(chars[i]);
}
}
return this;
}
/// <summary>
/// Allows all characters in the specified string through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowChars([NotNull] string chars)
{
for (int i = 0; i < chars.Length; i++)
{
_allowedCharsBitmap.AllowCharacter(chars[i]);
}
return this;
}
/// <summary>
/// Allows all characters approved by the specified filter through this filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowFilter([NotNull] ICodePointFilter filter)
{
foreach (var allowedCodePoint in filter.GetAllowedCodePoints())
{
// If the code point can't be represented as a BMP character, skip it.
char codePointAsChar = (char)allowedCodePoint;
if (allowedCodePoint == codePointAsChar)
{
_allowedCharsBitmap.AllowCharacter(codePointAsChar);
}
}
return this;
}
/// <summary>
/// Disallows all characters in the specified Unicode character block through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidBlock([NotNull] UnicodeBlock block)
{
int firstCodePoint = block.FirstCodePoint;
int blockSize = block.BlockSize;
for (int i = 0; i < blockSize; i++)
{
_allowedCharsBitmap.ForbidCharacter((char)(firstCodePoint + i));
}
return this;
}
/// <summary>
/// Disallows all characters in the specified Unicode character blocks through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidBlocks(params UnicodeBlock[] blocks)
{
if (blocks != null)
{
for (int i = 0; i < blocks.Length; i++)
{
ForbidBlock(blocks[i]);
}
}
return this;
}
/// <summary>
/// Disallows the specified character through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidChar(char c)
{
_allowedCharsBitmap.ForbidCharacter(c);
return this;
}
/// <summary>
/// Disallows the specified characters through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidChars(params char[] chars)
{
if (chars != null)
{
for (int i = 0; i < chars.Length; i++)
{
_allowedCharsBitmap.ForbidCharacter(chars[i]);
}
}
return this;
}
/// <summary>
/// Disallows all characters in the specified string through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidChars([NotNull] string chars)
{
for (int i = 0; i < chars.Length; i++)
{
_allowedCharsBitmap.ForbidCharacter(chars[i]);
}
return this;
}
/// <summary>
/// Retrieves the bitmap of allowed characters from this filter.
/// The returned bitmap is a clone of the original bitmap to avoid unintentional modification.
/// </summary>
internal AllowedCharsBitmap GetAllowedCharsBitmap()
{
return _allowedCharsBitmap.Clone();
}
/// <summary>
/// Gets an enumeration of all allowed code points.
/// </summary>
public IEnumerable<int> GetAllowedCodePoints()
{
for (int i = 0; i < 0x10000; i++)
{
if (_allowedCharsBitmap.IsCharacterAllowed((char)i))
{
yield return i;
}
}
}
/// <summary>
/// Returns a value stating whether the given character is allowed through the filter.
/// </summary>
public bool IsCharacterAllowed(char c)
{
return _allowedCharsBitmap.IsCharacterAllowed(c);
}
/// <summary>
/// Wraps the provided filter as a CodePointFilter, avoiding the clone if possible.
/// </summary>
internal static CodePointFilter Wrap(ICodePointFilter filter)
{
return (filter as CodePointFilter) ?? new CodePointFilter(filter);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,6 @@
using System;
using System.IO;
using Microsoft.Framework.Internal;
namespace Microsoft.Framework.WebEncoders
{
@ -19,8 +18,13 @@ namespace Microsoft.Framework.WebEncoders
/// The encoded value is also safe for inclusion inside an HTML attribute
/// as long as the attribute value is surrounded by single or double quotes.
/// </remarks>
public static void HtmlEncode([NotNull] this IHtmlEncoder htmlEncoder, string value, [NotNull] TextWriter output)
public static void HtmlEncode(this IHtmlEncoder htmlEncoder, string value, TextWriter output)
{
if (htmlEncoder == null)
{
throw new ArgumentNullException(nameof(htmlEncoder));
}
if (!String.IsNullOrEmpty(value))
{
htmlEncoder.HtmlEncode(value, 0, value.Length, output);
@ -30,8 +34,13 @@ namespace Microsoft.Framework.WebEncoders
/// <summary>
/// JavaScript-escapes a string and writes the result to the supplied output.
/// </summary>
public static void JavaScriptStringEncode([NotNull] this IJavaScriptStringEncoder javaScriptStringEncoder, string value, [NotNull] TextWriter output)
public static void JavaScriptStringEncode(this IJavaScriptStringEncoder javaScriptStringEncoder, string value, TextWriter output)
{
if (javaScriptStringEncoder == null)
{
throw new ArgumentNullException(nameof(javaScriptStringEncoder));
}
if (!String.IsNullOrEmpty(value))
{
javaScriptStringEncoder.JavaScriptStringEncode(value, 0, value.Length, output);
@ -45,8 +54,13 @@ namespace Microsoft.Framework.WebEncoders
/// The encoded value is safe for use in the segment, query, or
/// fragment portion of a URI.
/// </remarks>
public static void UrlEncode([NotNull] this IUrlEncoder urlEncoder, string value, [NotNull] TextWriter output)
public static void UrlEncode(this IUrlEncoder urlEncoder, string value, TextWriter output)
{
if (urlEncoder == null)
{
throw new ArgumentNullException(nameof(urlEncoder));
}
if (!String.IsNullOrEmpty(value))
{
urlEncoder.UrlEncode(value, 0, value.Length, output);

View File

@ -11,12 +11,11 @@ namespace Microsoft.Framework.WebEncoders
public sealed class EncoderOptions
{
/// <summary>
/// Specifies code point tables which do not require escaping by the encoders.
/// Specifies which code points are allowed to be represented unescaped by the encoders.
/// </summary>
/// <remarks>
/// If this property is set to a null array, then by default only the 'Basic Latin'
/// code point filter is active.
/// If this property is null, then the encoders will use their default allow lists.
/// </remarks>
public ICodePointFilter[] CodePointFilters { get; set; }
public ICodePointFilter CodePointFilter { get; set; }
}
}

View File

@ -2,9 +2,9 @@
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using Microsoft.AspNet.Http;
using Microsoft.Framework.WebEncoders;
using Microsoft.Framework.ConfigurationModel;
using Microsoft.Framework.Internal;
using Microsoft.Framework.WebEncoders;
namespace Microsoft.Framework.DependencyInjection
{

View File

@ -2,15 +2,15 @@
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using Microsoft.AspNet.Http;
using Microsoft.Framework.DependencyInjection;
using Microsoft.Framework.Internal;
namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Contains extension methods for fetching encoders from a service provider.
/// </summary>
public static class EncoderExtensions
public static class EncoderServiceProviderExtensions
{
/// <summary>
/// Retrieves an IHtmlEncoder from a service provider.

View File

@ -22,17 +22,17 @@ namespace Microsoft.Framework.WebEncoders
// Register the default encoders
// We want to call the 'Default' property getters lazily since they perform static caching
yield return describe.Singleton<IHtmlEncoder>(CreateFactory(() => HtmlEncoder.Default, filters => new HtmlEncoder(filters)));
yield return describe.Singleton<IJavaScriptStringEncoder>(CreateFactory(() => JavaScriptStringEncoder.Default, filters => new JavaScriptStringEncoder(filters)));
yield return describe.Singleton<IUrlEncoder>(CreateFactory(() => UrlEncoder.Default, filters => new UrlEncoder(filters)));
yield return describe.Singleton<IHtmlEncoder>(CreateFactory(() => HtmlEncoder.Default, filter => new HtmlEncoder(filter)));
yield return describe.Singleton<IJavaScriptStringEncoder>(CreateFactory(() => JavaScriptStringEncoder.Default, filter => new JavaScriptStringEncoder(filter)));
yield return describe.Singleton<IUrlEncoder>(CreateFactory(() => UrlEncoder.Default, filter => new UrlEncoder(filter)));
}
private static Func<IServiceProvider, T> CreateFactory<T>(Func<T> parameterlessCtor, Func<ICodePointFilter[], T> parameterfulCtor)
private static Func<IServiceProvider, T> CreateFactory<T>(Func<T> defaultFactory, Func<ICodePointFilter, T> customFilterFactory)
{
return serviceProvider =>
{
var codePointFilters = serviceProvider?.GetService<IOptions<EncoderOptions>>()?.Options?.CodePointFilters;
return (codePointFilters != null) ? parameterfulCtor(codePointFilters) : parameterlessCtor();
var codePointFilter = serviceProvider?.GetService<IOptions<EncoderOptions>>()?.Options?.CodePointFilter;
return (codePointFilter != null) ? customFilterFactory(codePointFilter) : defaultFactory();
};
}
}

View File

@ -13,6 +13,8 @@ namespace Microsoft.Framework.WebEncoders
/// can be represented unencoded.
/// </summary>
/// <remarks>
/// Instances of this type will always encode a certain set of characters (such as &lt;
/// and &gt;), even if the filter provided in the constructor allows such characters.
/// Once constructed, instances of this class are thread-safe for multiple callers.
/// </remarks>
public unsafe sealed class HtmlEncoder : IHtmlEncoder
@ -32,10 +34,19 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Instantiates an encoder using a custom allow list of characters.
/// Instantiates an encoder specifying which Unicode character blocks are allowed to
/// pass through the encoder unescaped.
/// </summary>
public HtmlEncoder(params ICodePointFilter[] filters)
: this(new HtmlUnicodeEncoder(filters))
public HtmlEncoder(params UnicodeBlock[] allowedBlocks)
: this(new HtmlUnicodeEncoder(new CodePointFilter(allowedBlocks)))
{
}
/// <summary>
/// Instantiates an encoder using a custom code point filter.
/// </summary>
public HtmlEncoder(ICodePointFilter filter)
: this(new HtmlUnicodeEncoder(CodePointFilter.Wrap(filter)))
{
}
@ -97,8 +108,8 @@ namespace Microsoft.Framework.WebEncoders
// generate at most 10 output chars ("&#x10FFFF;"), which equates to 5 output chars per input char.
private const int MaxOutputCharsPerInputChar = 8;
internal HtmlUnicodeEncoder(ICodePointFilter[] filters)
: base(filters, MaxOutputCharsPerInputChar)
internal HtmlUnicodeEncoder(CodePointFilter filter)
: base(filter, MaxOutputCharsPerInputChar)
{
}
@ -109,7 +120,7 @@ namespace Microsoft.Framework.WebEncoders
HtmlUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
if (encoder == null)
{
encoder = new HtmlUnicodeEncoder(new[] { CodePointFilters.BasicLatin });
encoder = new HtmlUnicodeEncoder(new CodePointFilter());
Volatile.Write(ref _basicLatinSingleton, encoder);
}
return encoder;

View File

@ -1,5 +1,4 @@
using System;
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;

View File

@ -16,6 +16,9 @@ namespace Microsoft.Framework.WebEncoders
/// JavaScript-escapes a character array and writes the result to the
/// supplied output.
/// </summary>
/// <remarks>
/// The encoded value is appropriately encoded for inclusion inside a quoted JSON string.
/// </remarks>
void JavaScriptStringEncode([NotNull] char[] value, int startIndex, int charCount, [NotNull] TextWriter output);
/// <summary>
@ -23,6 +26,7 @@ namespace Microsoft.Framework.WebEncoders
/// </summary>
/// <returns>
/// The JavaScript-escaped value, or null if the input string was null.
/// The encoded value is appropriately encoded for inclusion inside a quoted JSON string.
/// </returns>
string JavaScriptStringEncode(string value);
@ -30,6 +34,9 @@ namespace Microsoft.Framework.WebEncoders
/// JavaScript-escapes a given input string and writes the
/// result to the supplied output.
/// </summary>
/// <remarks>
/// The encoded value is appropriately encoded for inclusion inside a quoted JSON string.
/// </remarks>
void JavaScriptStringEncode([NotNull] string value, int startIndex, int charCount, [NotNull] TextWriter output);
}
}

View File

@ -13,6 +13,8 @@ namespace Microsoft.Framework.WebEncoders
/// can be represented unescaped.
/// </summary>
/// <remarks>
/// Instances of this type will always encode a certain set of characters (such as '
/// and "), even if the filter provided in the constructor allows such characters.
/// Once constructed, instances of this class are thread-safe for multiple callers.
/// </remarks>
public sealed class JavaScriptStringEncoder : IJavaScriptStringEncoder
@ -32,10 +34,19 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Instantiates an encoder using a custom allow list of characters.
/// Instantiates an encoder specifying which Unicode character blocks are allowed to
/// pass through the encoder unescaped.
/// </summary>
public JavaScriptStringEncoder(params ICodePointFilter[] filters)
: this(new JavaScriptStringUnicodeEncoder(filters))
public JavaScriptStringEncoder(params UnicodeBlock[] allowedBlocks)
: this(new JavaScriptStringUnicodeEncoder(new CodePointFilter(allowedBlocks)))
{
}
/// <summary>
/// Instantiates an encoder using a custom code point filter.
/// </summary>
public JavaScriptStringEncoder(ICodePointFilter filter)
: this(new JavaScriptStringUnicodeEncoder(CodePointFilter.Wrap(filter)))
{
}
@ -97,8 +108,8 @@ namespace Microsoft.Framework.WebEncoders
// surrogate pairs in the output.
private const int MaxOutputCharsPerInputChar = 6;
internal JavaScriptStringUnicodeEncoder(ICodePointFilter[] filters)
: base(filters, MaxOutputCharsPerInputChar)
internal JavaScriptStringUnicodeEncoder(CodePointFilter filter)
: base(filter, MaxOutputCharsPerInputChar)
{
// The only interesting characters above and beyond what the base encoder
// already covers are the solidus and reverse solidus.
@ -113,7 +124,7 @@ namespace Microsoft.Framework.WebEncoders
JavaScriptStringUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
if (encoder == null)
{
encoder = new JavaScriptStringUnicodeEncoder(new[] { CodePointFilters.BasicLatin });
encoder = new JavaScriptStringUnicodeEncoder(new CodePointFilter());
Volatile.Write(ref _basicLatinSingleton, encoder);
}
return encoder;

View File

@ -0,0 +1,66 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Represents a range of Unicode code points.
/// </summary>
/// <remarks>
/// Currently only the Basic Multilingual Plane is supported.
/// </remarks>
public sealed class UnicodeBlock
{
/// <summary>
/// Creates a new representation of a Unicode block given the first code point
/// in the block and the number of code points in the block.
/// </summary>
public UnicodeBlock(int firstCodePoint, int blockSize)
{
// Parameter checking: the first code point must be U+nnn0, the block size must
// be a multiple of 16 bytes, and we can't span planes.
// See http://unicode.org/faq/blocks_ranges.html for more info.
if (firstCodePoint < 0 || firstCodePoint > 0xFFFF || ((firstCodePoint & 0xF) != 0))
{
throw new ArgumentOutOfRangeException(nameof(firstCodePoint));
}
if (blockSize < 0 || (blockSize % 16 != 0) || ((long)firstCodePoint + (long)blockSize > 0x10000))
{
throw new ArgumentOutOfRangeException(nameof(blockSize));
}
FirstCodePoint = firstCodePoint;
BlockSize = blockSize;
}
/// <summary>
/// The number of code points in this block.
/// </summary>
public int BlockSize { get; }
/// <summary>
/// The first code point in this block.
/// </summary>
public int FirstCodePoint { get; }
public static UnicodeBlock FromCharacterRange(char firstChar, char lastChar)
{
// Parameter checking: the first code point must be U+nnn0 and the last
// code point must be U+nnnF. We already can't span planes since 'char'
// allows only Basic Multilingual Plane characters.
// See http://unicode.org/faq/blocks_ranges.html for more info.
if ((firstChar & 0xF) != 0)
{
throw new ArgumentOutOfRangeException(nameof(firstChar));
}
if (lastChar < firstChar || (lastChar & 0xF) != 0xF)
{
throw new ArgumentOutOfRangeException(nameof(lastChar));
}
return new UnicodeBlock(firstChar, 1 + (int)(lastChar - firstChar));
}
}
}

View File

@ -0,0 +1,64 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Threading;
namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Contains predefined Unicode code point filters.
/// </summary>
public static partial class UnicodeBlocks
{
/// <summary>
/// Represents an empty Unicode block.
/// </summary>
/// <remarks>
/// This block contains no code points.
/// </remarks>
public static UnicodeBlock None
{
get
{
return Volatile.Read(ref _none) ?? CreateEmptyBlock(ref _none);
}
}
private static UnicodeBlock _none;
/// <summary>
/// Represents a block containing all characters in the Unicode Basic Multilingual Plane (U+0000..U+FFFF).
/// </summary>
public static UnicodeBlock All
{
get
{
return Volatile.Read(ref _all) ?? CreateBlock(ref _all, '\u0000', '\uFFFF');
}
}
private static UnicodeBlock _all;
[MethodImpl(MethodImplOptions.NoInlining)] // the caller should be inlined, not this method
private static UnicodeBlock CreateBlock(ref UnicodeBlock block, char first, char last)
{
// If the block hasn't been created, create it now.
// It's ok if two threads race and one overwrites the other's 'block' value.
Debug.Assert(last > first, "Code points were specified out of order.");
var newBlock = UnicodeBlock.FromCharacterRange(first, last);
Volatile.Write(ref block, newBlock);
return newBlock;
}
[MethodImpl(MethodImplOptions.NoInlining)] // the caller should be inlined, not this method
private static UnicodeBlock CreateEmptyBlock(ref UnicodeBlock block)
{
// If the block hasn't been created, create it now.
// It's ok if two threads race and one overwrites the other's 'block' value.
var newBlock = new UnicodeBlock(0, 0);
Volatile.Write(ref block, newBlock);
return newBlock;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -6,14 +6,13 @@ using System.Diagnostics;
using System.IO;
using System.Runtime.CompilerServices;
using System.Text;
using Microsoft.Framework.Internal;
namespace Microsoft.Framework.WebEncoders
{
internal unsafe abstract class UnicodeEncoderBase
{
// A bitmap of characters which are allowed to be returned unescaped.
private readonly uint[] _allowedCharsBitmap = new uint[0x10000 / 32];
private AllowedCharsBitmap _allowedCharsBitmap;
// The worst-case number of output chars generated for any input char.
private readonly int _maxOutputCharsPerInputChar;
@ -21,25 +20,10 @@ namespace Microsoft.Framework.WebEncoders
/// <summary>
/// Instantiates an encoder using a custom allow list of characters.
/// </summary>
protected UnicodeEncoderBase(ICodePointFilter[] filters, int maxOutputCharsPerInputChar)
protected UnicodeEncoderBase(CodePointFilter filter, int maxOutputCharsPerInputChar)
{
_maxOutputCharsPerInputChar = maxOutputCharsPerInputChar;
if (filters != null)
{
// Punch a hole for each allowed code point across all filters (this is an OR).
// We don't allow supplementary (astral) characters for now.
foreach (var filter in filters)
{
foreach (var codePoint in filter.GetAllowedCodePoints())
{
if (!UnicodeHelpers.IsSupplementaryCodePoint(codePoint))
{
AllowCharacter((char)codePoint);
}
}
}
}
_allowedCharsBitmap = filter.GetAllowedCharsBitmap();
// Forbid characters that are special in HTML.
// Even though this is a common encoder used by everybody (including URL
@ -55,38 +39,29 @@ namespace Microsoft.Framework.WebEncoders
// Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
// (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
uint[] definedCharactersBitmap = UnicodeHelpers.GetDefinedCharacterBitmap();
Debug.Assert(definedCharactersBitmap.Length == _allowedCharsBitmap.Length);
for (int i = 0; i < _allowedCharsBitmap.Length; i++)
{
_allowedCharsBitmap[i] &= definedCharactersBitmap[i];
}
}
// Marks a character as allowed (can be returned unencoded)
private void AllowCharacter(char c)
{
uint codePoint = (uint)c;
int index = (int)(codePoint >> 5);
int offset = (int)(codePoint & 0x1FU);
_allowedCharsBitmap[index] |= 0x1U << offset;
_allowedCharsBitmap.ForbidUndefinedCharacters();
}
// Marks a character as forbidden (must be returned encoded)
protected void ForbidCharacter(char c)
{
uint codePoint = (uint)c;
int index = (int)(codePoint >> 5);
int offset = (int)(codePoint & 0x1FU);
_allowedCharsBitmap[index] &= ~(0x1U << offset);
_allowedCharsBitmap.ForbidCharacter(c);
}
/// <summary>
/// Entry point to the encoder.
/// </summary>
public void Encode([NotNull] char[] value, int startIndex, int charCount, [NotNull] TextWriter output)
public void Encode(char[] value, int startIndex, int charCount, TextWriter output)
{
// Input checking
if (value == null)
{
throw new ArgumentNullException(nameof(value));
}
if (output == null)
{
throw new ArgumentNullException(nameof(output));
}
ValidateInputs(startIndex, charCount, actualInputLength: value.Length);
if (charCount != 0)
@ -137,9 +112,17 @@ namespace Microsoft.Framework.WebEncoders
/// <summary>
/// Entry point to the encoder.
/// </summary>
public void Encode([NotNull] string value, int startIndex, int charCount, [NotNull] TextWriter output)
public void Encode(string value, int startIndex, int charCount, TextWriter output)
{
// Input checking
if (value == null)
{
throw new ArgumentNullException(nameof(value));
}
if (output == null)
{
throw new ArgumentNullException(nameof(output));
}
ValidateInputs(startIndex, charCount, actualInputLength: value.Length);
if (charCount != 0)
@ -249,10 +232,7 @@ namespace Microsoft.Framework.WebEncoders
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private bool IsCharacterAllowed(char c)
{
uint codePoint = (uint)c;
int index = (int)(codePoint >> 5);
int offset = (int)(codePoint & 0x1FU);
return ((_allowedCharsBitmap[index] >> offset) & 0x1U) != 0;
return _allowedCharsBitmap.IsCharacterAllowed(c);
}
private static void ValidateInputs(int startIndex, int charCount, int actualInputLength)

View File

@ -13,6 +13,8 @@ namespace Microsoft.Framework.WebEncoders
/// can be represented unescaped.
/// </summary>
/// <remarks>
/// Instances of this type will always encode a certain set of characters (such as +
/// and ?), even if the filter provided in the constructor allows such characters.
/// Once constructed, instances of this class are thread-safe for multiple callers.
/// </remarks>
public sealed class UrlEncoder : IUrlEncoder
@ -32,10 +34,19 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Instantiates an encoder using a custom allow list of characters.
/// Instantiates an encoder specifying which Unicode character blocks are allowed to
/// pass through the encoder unescaped.
/// </summary>
public UrlEncoder(params ICodePointFilter[] filters)
: this(new UrlUnicodeEncoder(filters))
public UrlEncoder(params UnicodeBlock[] allowedBlocks)
: this(new UrlUnicodeEncoder(new CodePointFilter(allowedBlocks)))
{
}
/// <summary>
/// Instantiates an encoder using a custom code point filter.
/// </summary>
public UrlEncoder(ICodePointFilter filter)
: this(new UrlUnicodeEncoder(CodePointFilter.Wrap(filter)))
{
}
@ -98,8 +109,8 @@ namespace Microsoft.Framework.WebEncoders
// chars to produce 12 output chars "%XX%YY%ZZ%WW", which is 6 output chars per input char.
private const int MaxOutputCharsPerInputChar = 9;
internal UrlUnicodeEncoder(ICodePointFilter[] filters)
: base(filters, MaxOutputCharsPerInputChar)
internal UrlUnicodeEncoder(CodePointFilter filter)
: base(filter, MaxOutputCharsPerInputChar)
{
// Per RFC 3987, Sec. 2.2, we want encodings that are safe for
// 'isegment', 'iquery', and 'ifragment'. The only thing these
@ -152,7 +163,7 @@ namespace Microsoft.Framework.WebEncoders
UrlUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
if (encoder == null)
{
encoder = new UrlUnicodeEncoder(new[] { CodePointFilters.BasicLatin });
encoder = new UrlUnicodeEncoder(new CodePointFilter());
Volatile.Write(ref _basicLatinSingleton, encoder);
}
return encoder;

View File

@ -5,6 +5,9 @@
"allowUnsafe": true
},
"dependencies": {
"Microsoft.Framework.ConfigurationModel": "1.0.0-*",
"Microsoft.Framework.DependencyInjection": "1.0.0-*",
"Microsoft.Framework.OptionsModel": "1.0.0-*",
"Microsoft.Framework.NotNullAttribute.Internal": { "type": "build", "version": "1.0.0-*" }
},
"frameworks": {