API cleanup: Rename UnicodeBlock -> UnicodeRange

Also clean up related doc comments
This commit is contained in:
Levi B 2015-03-07 13:29:03 -08:00
parent ed380ef61c
commit 64077026c7
21 changed files with 2083 additions and 3055 deletions

View File

@ -15,7 +15,7 @@ namespace Microsoft.Framework.WebEncoders
private AllowedCharsBitmap _allowedCharsBitmap;
/// <summary>
/// Instantiates an empty filter.
/// Instantiates an empty filter (allows no code points through by default).
/// </summary>
public CodePointFilter()
{
@ -23,7 +23,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Instantiates the filter by cloning the allow list of another filter.
/// Instantiates the filter by cloning the allow list of another <see cref="ICodePointFilter"/>.
/// </summary>
public CodePointFilter([NotNull] ICodePointFilter other)
{
@ -40,53 +40,17 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Instantiates the filter where only the provided Unicode character blocks are
/// allowed by the filter.
/// Instantiates the filter where only the character ranges specified by <paramref name="allowedRanges"/>
/// are allowed by the filter.
/// </summary>
/// <param name="allowedBlocks"></param>
public CodePointFilter(params UnicodeBlock[] allowedBlocks)
public CodePointFilter(params UnicodeRange[] allowedRanges)
{
_allowedCharsBitmap = new AllowedCharsBitmap();
AllowBlocks(allowedBlocks);
AllowRanges(allowedRanges);
}
/// <summary>
/// Allows all characters in the specified Unicode character block through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowBlock([NotNull] UnicodeBlock block)
{
int firstCodePoint = block.FirstCodePoint;
int blockSize = block.BlockSize;
for (int i = 0; i < blockSize; i++)
{
_allowedCharsBitmap.AllowCharacter((char)(firstCodePoint + i));
}
return this;
}
/// <summary>
/// Allows all characters in the specified Unicode character blocks through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowBlocks(params UnicodeBlock[] blocks)
{
if (blocks != null)
{
for (int i = 0; i < blocks.Length; i++)
{
AllowBlock(blocks[i]);
}
}
return this;
}
/// <summary>
/// Allows the specified character through the filter.
/// Allows the character specified by <paramref name="c"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
@ -98,7 +62,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Allows the specified characters through the filter.
/// Allows all characters specified by <paramref name="chars"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
@ -116,7 +80,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Allows all characters in the specified string through the filter.
/// Allows all characters in the string <paramref name="chars"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
@ -131,7 +95,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Allows all characters approved by the specified filter through this filter.
/// Allows all characters specified by <paramref name="filter"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
@ -151,7 +115,42 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Disallows all characters through the filter.
/// Allows all characters specified by <paramref name="range"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowRange([NotNull] UnicodeRange range)
{
int firstCodePoint = range.FirstCodePoint;
int rangeSize = range.RangeSize;
for (int i = 0; i < rangeSize; i++)
{
_allowedCharsBitmap.AllowCharacter((char)(firstCodePoint + i));
}
return this;
}
/// <summary>
/// Allows all characters specified by <paramref name="ranges"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter AllowRanges(params UnicodeRange[] ranges)
{
if (ranges != null)
{
for (int i = 0; i < ranges.Length; i++)
{
AllowRange(ranges[i]);
}
}
return this;
}
/// <summary>
/// Resets this filter by disallowing all characters.
/// </summary>
/// <returns>
/// The 'this' instance.
@ -163,42 +162,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Disallows all characters in the specified Unicode character block through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidBlock([NotNull] UnicodeBlock block)
{
int firstCodePoint = block.FirstCodePoint;
int blockSize = block.BlockSize;
for (int i = 0; i < blockSize; i++)
{
_allowedCharsBitmap.ForbidCharacter((char)(firstCodePoint + i));
}
return this;
}
/// <summary>
/// Disallows all characters in the specified Unicode character blocks through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidBlocks(params UnicodeBlock[] blocks)
{
if (blocks != null)
{
for (int i = 0; i < blocks.Length; i++)
{
ForbidBlock(blocks[i]);
}
}
return this;
}
/// <summary>
/// Disallows the specified character through the filter.
/// Disallows the character <paramref name="c"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
@ -210,7 +174,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Disallows the specified characters through the filter.
/// Disallows all characters specified by <paramref name="chars"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
@ -228,7 +192,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Disallows all characters in the specified string through the filter.
/// Disallows all characters in the string <paramref name="chars"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
@ -242,6 +206,41 @@ namespace Microsoft.Framework.WebEncoders
return this;
}
/// <summary>
/// Disallows all characters specified by <paramref name="range"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidRange([NotNull] UnicodeRange range)
{
int firstCodePoint = range.FirstCodePoint;
int rangeSize = range.RangeSize;
for (int i = 0; i < rangeSize; i++)
{
_allowedCharsBitmap.ForbidCharacter((char)(firstCodePoint + i));
}
return this;
}
/// <summary>
/// Disallows all characters specified by <paramref name="ranges"/> through the filter.
/// </summary>
/// <returns>
/// The 'this' instance.
/// </returns>
public CodePointFilter ForbidRanges(params UnicodeRange[] ranges)
{
if (ranges != null)
{
for (int i = 0; i < ranges.Length; i++)
{
ForbidRange(ranges[i]);
}
}
return this;
}
/// <summary>
/// Retrieves the bitmap of allowed characters from this filter.
/// The returned bitmap is a clone of the original bitmap to avoid unintentional modification.
@ -266,13 +265,13 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Returns a value stating whether the given character is allowed through the filter.
/// Returns a value stating whether the character <paramref name="c"/> is allowed through the filter.
/// </summary>
public bool IsCharacterAllowed(char c)
{
return _allowedCharsBitmap.IsCharacterAllowed(c);
}
/// <summary>
/// Wraps the provided filter as a CodePointFilter, avoiding the clone if possible.
/// </summary>

View File

@ -26,7 +26,7 @@ namespace Microsoft.Framework.WebEncoders
private readonly HtmlUnicodeEncoder _innerUnicodeEncoder;
/// <summary>
/// Instantiates an encoder using the 'Basic Latin' code table as the allow list.
/// Instantiates an encoder using <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
/// </summary>
public HtmlEncoder()
: this(HtmlUnicodeEncoder.BasicLatin)
@ -34,11 +34,11 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Instantiates an encoder specifying which Unicode character blocks are allowed to
/// Instantiates an encoder specifying which Unicode character ranges are allowed to
/// pass through the encoder unescaped.
/// </summary>
public HtmlEncoder(params UnicodeBlock[] allowedBlocks)
: this(new HtmlUnicodeEncoder(new CodePointFilter(allowedBlocks)))
public HtmlEncoder(params UnicodeRange[] allowedRanges)
: this(new HtmlUnicodeEncoder(new CodePointFilter(allowedRanges)))
{
}
@ -57,8 +57,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// A default instance of the HtmlEncoder, equivalent to allowing only
/// the 'Basic Latin' character range.
/// The default <see cref="HtmlEncoder"/>, which uses <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
/// </summary>
public static HtmlEncoder Default
{
@ -120,7 +119,7 @@ namespace Microsoft.Framework.WebEncoders
HtmlUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
if (encoder == null)
{
encoder = new HtmlUnicodeEncoder(new CodePointFilter(UnicodeBlocks.BasicLatin));
encoder = new HtmlUnicodeEncoder(new CodePointFilter(UnicodeRanges.BasicLatin));
Volatile.Write(ref _basicLatinSingleton, encoder);
}
return encoder;

View File

@ -26,7 +26,7 @@ namespace Microsoft.Framework.WebEncoders
private readonly JavaScriptStringUnicodeEncoder _innerUnicodeEncoder;
/// <summary>
/// Instantiates an encoder using the 'Basic Latin' code table as the allow list.
/// Instantiates an encoder using <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
/// </summary>
public JavaScriptStringEncoder()
: this(JavaScriptStringUnicodeEncoder.BasicLatin)
@ -34,11 +34,11 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Instantiates an encoder specifying which Unicode character blocks are allowed to
/// Instantiates an encoder specifying which Unicode character ranges are allowed to
/// pass through the encoder unescaped.
/// </summary>
public JavaScriptStringEncoder(params UnicodeBlock[] allowedBlocks)
: this(new JavaScriptStringUnicodeEncoder(new CodePointFilter(allowedBlocks)))
public JavaScriptStringEncoder(params UnicodeRange[] allowedRanges)
: this(new JavaScriptStringUnicodeEncoder(new CodePointFilter(allowedRanges)))
{
}
@ -57,8 +57,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// A default instance of the JavaScriptStringEncoder, equivalent to allowing only
/// the 'Basic Latin' character range.
/// The default <see cref="JavaScriptStringEncoder"/>, which uses <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
/// </summary>
public static JavaScriptStringEncoder Default
{
@ -124,7 +123,7 @@ namespace Microsoft.Framework.WebEncoders
JavaScriptStringUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
if (encoder == null)
{
encoder = new JavaScriptStringUnicodeEncoder(new CodePointFilter(UnicodeBlocks.BasicLatin));
encoder = new JavaScriptStringUnicodeEncoder(new CodePointFilter(UnicodeRanges.BasicLatin));
Volatile.Write(ref _basicLatinSingleton, encoder);
}
return encoder;

View File

@ -1,66 +0,0 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Represents a range of Unicode code points.
/// </summary>
/// <remarks>
/// Currently only the Basic Multilingual Plane is supported.
/// </remarks>
public sealed class UnicodeBlock
{
/// <summary>
/// Creates a new representation of a Unicode block given the first code point
/// in the block and the number of code points in the block.
/// </summary>
public UnicodeBlock(int firstCodePoint, int blockSize)
{
// Parameter checking: the first code point must be U+nnn0, the block size must
// be a multiple of 16 bytes, and we can't span planes.
// See http://unicode.org/faq/blocks_ranges.html for more info.
if (firstCodePoint < 0 || firstCodePoint > 0xFFFF || ((firstCodePoint & 0xF) != 0))
{
throw new ArgumentOutOfRangeException(nameof(firstCodePoint));
}
if (blockSize < 0 || (blockSize % 16 != 0) || ((long)firstCodePoint + (long)blockSize > 0x10000))
{
throw new ArgumentOutOfRangeException(nameof(blockSize));
}
FirstCodePoint = firstCodePoint;
BlockSize = blockSize;
}
/// <summary>
/// The number of code points in this block.
/// </summary>
public int BlockSize { get; }
/// <summary>
/// The first code point in this block.
/// </summary>
public int FirstCodePoint { get; }
public static UnicodeBlock FromCharacterRange(char firstChar, char lastChar)
{
// Parameter checking: the first code point must be U+nnn0 and the last
// code point must be U+nnnF. We already can't span planes since 'char'
// allows only Basic Multilingual Plane characters.
// See http://unicode.org/faq/blocks_ranges.html for more info.
if ((firstChar & 0xF) != 0)
{
throw new ArgumentOutOfRangeException(nameof(firstChar));
}
if (lastChar < firstChar || (lastChar & 0xF) != 0xF)
{
throw new ArgumentOutOfRangeException(nameof(lastChar));
}
return new UnicodeBlock(firstChar, 1 + (int)(lastChar - firstChar));
}
}
}

View File

@ -1,64 +0,0 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Threading;
namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Contains predefined Unicode code point filters.
/// </summary>
public static partial class UnicodeBlocks
{
/// <summary>
/// Represents an empty Unicode block.
/// </summary>
/// <remarks>
/// This block contains no code points.
/// </remarks>
public static UnicodeBlock None
{
get
{
return Volatile.Read(ref _none) ?? CreateEmptyBlock(ref _none);
}
}
private static UnicodeBlock _none;
/// <summary>
/// Represents a block containing all characters in the Unicode Basic Multilingual Plane (U+0000..U+FFFF).
/// </summary>
public static UnicodeBlock All
{
get
{
return Volatile.Read(ref _all) ?? CreateBlock(ref _all, '\u0000', '\uFFFF');
}
}
private static UnicodeBlock _all;
[MethodImpl(MethodImplOptions.NoInlining)] // the caller should be inlined, not this method
private static UnicodeBlock CreateBlock(ref UnicodeBlock block, char first, char last)
{
// If the block hasn't been created, create it now.
// It's ok if two threads race and one overwrites the other's 'block' value.
Debug.Assert(last > first, "Code points were specified out of order.");
var newBlock = UnicodeBlock.FromCharacterRange(first, last);
Volatile.Write(ref block, newBlock);
return newBlock;
}
[MethodImpl(MethodImplOptions.NoInlining)] // the caller should be inlined, not this method
private static UnicodeBlock CreateEmptyBlock(ref UnicodeBlock block)
{
// If the block hasn't been created, create it now.
// It's ok if two threads race and one overwrites the other's 'block' value.
var newBlock = new UnicodeBlock(0, 0);
Volatile.Write(ref block, newBlock);
return newBlock;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,64 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Represents a contiguous range of Unicode code points.
/// </summary>
/// <remarks>
/// Currently only the Basic Multilingual Plane is supported.
/// </remarks>
public sealed class UnicodeRange
{
/// <summary>
/// Creates a new <see cref="UnicodeRange"/>.
/// </summary>
/// <param name="firstCodePoint">The first code point in the range.</param>
/// <param name="rangeSize">The number of code points in the range.</param>
public UnicodeRange(int firstCodePoint, int rangeSize)
{
// Parameter checking: the first code point and last code point must
// lie within the BMP. See http://unicode.org/faq/blocks_ranges.html for more info.
if (firstCodePoint < 0 || firstCodePoint > 0xFFFF)
{
throw new ArgumentOutOfRangeException(nameof(firstCodePoint));
}
if (rangeSize < 0 || ((long)firstCodePoint + (long)rangeSize > 0x10000))
{
throw new ArgumentOutOfRangeException(nameof(rangeSize));
}
FirstCodePoint = firstCodePoint;
RangeSize = rangeSize;
}
/// <summary>
/// The first code point in this range.
/// </summary>
public int FirstCodePoint { get; }
/// <summary>
/// The number of code points in this range.
/// </summary>
public int RangeSize { get; }
/// <summary>
/// Creates a new <see cref="UnicodeRange"/> from a span of characters.
/// </summary>
/// <param name="firstChar">The first character in the range.</param>
/// <param name="lastChar">The last character in the range.</param>
/// <returns>The <see cref="UnicodeRange"/> representing this span.</returns>
public static UnicodeRange FromSpan(char firstChar, char lastChar)
{
if (lastChar < firstChar)
{
throw new ArgumentOutOfRangeException(nameof(lastChar));
}
return new UnicodeRange(firstChar, 1 + (int)(lastChar - firstChar));
}
}
}

View File

@ -0,0 +1,51 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Threading;
namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Contains predefined <see cref="UnicodeRange"/> instances which correspond to blocks
/// from the Unicode 7.0 specification.
/// </summary>
public static partial class UnicodeRanges
{
/// <summary>
/// An empty <see cref="UnicodeRange"/>. This range contains no code points.
/// </summary>
public static UnicodeRange None => Volatile.Read(ref _none) ?? CreateEmptyRange(ref _none);
private static UnicodeRange _none;
/// <summary>
/// A <see cref="UnicodeRange"/> which contains all characters in the Unicode Basic
/// Multilingual Plane (U+0000..U+FFFF).
/// </summary>
public static UnicodeRange All => Volatile.Read(ref _all) ?? CreateRange(ref _all, '\u0000', '\uFFFF');
private static UnicodeRange _all;
[MethodImpl(MethodImplOptions.NoInlining)] // the caller should be inlined, not this method
private static UnicodeRange CreateEmptyRange(ref UnicodeRange range)
{
// If the range hasn't been created, create it now.
// It's ok if two threads race and one overwrites the other's 'range' value.
var newRange = new UnicodeRange(0, 0);
Volatile.Write(ref range, newRange);
return newRange;
}
[MethodImpl(MethodImplOptions.NoInlining)] // the caller should be inlined, not this method
private static UnicodeRange CreateRange(ref UnicodeRange range, char first, char last)
{
// If the range hasn't been created, create it now.
// It's ok if two threads race and one overwrites the other's 'range' value.
Debug.Assert(last > first, "Code points were specified out of order.");
var newRange = UnicodeRange.FromSpan(first, last);
Volatile.Write(ref range, newRange);
return newRange;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,7 @@ namespace Microsoft.Framework.WebEncoders
private readonly UrlUnicodeEncoder _innerUnicodeEncoder;
/// <summary>
/// Instantiates an encoder using the 'Basic Latin' code table as the allow list.
/// Instantiates an encoder using <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
/// </summary>
public UrlEncoder()
: this(UrlUnicodeEncoder.BasicLatin)
@ -34,11 +34,11 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// Instantiates an encoder specifying which Unicode character blocks are allowed to
/// Instantiates an encoder specifying which Unicode character ranges are allowed to
/// pass through the encoder unescaped.
/// </summary>
public UrlEncoder(params UnicodeBlock[] allowedBlocks)
: this(new UrlUnicodeEncoder(new CodePointFilter(allowedBlocks)))
public UrlEncoder(params UnicodeRange[] allowedRanges)
: this(new UrlUnicodeEncoder(new CodePointFilter(allowedRanges)))
{
}
@ -57,8 +57,7 @@ namespace Microsoft.Framework.WebEncoders
}
/// <summary>
/// A default instance of the UrlEncoder, equivalent to allowing only
/// the 'Basic Latin' character range.
/// The default <see cref="UrlEncoder"/> which uses <see cref="UnicodeRanges.BasicLatin"/> as its allow list.
/// </summary>
public static UrlEncoder Default
{
@ -133,8 +132,8 @@ namespace Microsoft.Framework.WebEncoders
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="
//
// From this list, the base encoder blocks "&", "'", "+",
// and we'll additionally block "=" since it has special meaning
// From this list, the base encoder forbids "&", "'", "+",
// and we'll additionally forbid "=" since it has special meaning
// in x-www-form-urlencoded representations.
//
// This means that the full list of allowed characters from the
@ -163,7 +162,7 @@ namespace Microsoft.Framework.WebEncoders
UrlUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
if (encoder == null)
{
encoder = new UrlUnicodeEncoder(new CodePointFilter(UnicodeBlocks.BasicLatin));
encoder = new UrlUnicodeEncoder(new CodePointFilter(UnicodeRanges.BasicLatin));
Volatile.Write(ref _basicLatinSingleton, encoder);
}
return encoder;

View File

@ -43,7 +43,7 @@ namespace Microsoft.Framework.WebEncoders
public void Ctor_OtherCodePointFilterAsConcreteType_Clones()
{
// Arrange
var originalFilter = new CodePointFilter(UnicodeBlocks.None).AllowChar('x');
var originalFilter = new CodePointFilter().AllowChar('x');
// Act
var newFilter = new CodePointFilter(originalFilter).AllowChar('y');
@ -56,10 +56,10 @@ namespace Microsoft.Framework.WebEncoders
}
[Fact]
public void Ctor_UnicodeBlocks()
public void Ctor_UnicodeRanges()
{
// Act
var filter = new CodePointFilter(UnicodeBlocks.LatinExtendedA, UnicodeBlocks.LatinExtendedC);
var filter = new CodePointFilter(UnicodeRanges.LatinExtendedA, UnicodeRanges.LatinExtendedC);
// Assert
for (int i = 0; i < 0x0100; i++)
@ -84,64 +84,6 @@ namespace Microsoft.Framework.WebEncoders
}
}
[Fact]
public void AllowBlock()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.None);
// Act
var retVal = filter.AllowBlock(UnicodeBlocks.LatinExtendedA);
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
for (int i = 0; i < 0x0100; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x0100; i <= 0x017F; i++)
{
Assert.True(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x0180; i <= Char.MaxValue; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
}
[Fact]
public void AllowBlocks()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.None);
// Act
var retVal = filter.AllowBlocks(UnicodeBlocks.LatinExtendedA, UnicodeBlocks.LatinExtendedC);
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
for (int i = 0; i < 0x0100; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x0100; i <= 0x017F; i++)
{
Assert.True(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x0180; i < 0x2C60; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x2C60; i <= 0x2C7F; i++)
{
Assert.True(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x2C80; i <= Char.MaxValue; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
}
[Fact]
public void AllowChar()
{
@ -195,7 +137,7 @@ namespace Microsoft.Framework.WebEncoders
public void AllowFilter()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
var filter = new CodePointFilter(UnicodeRanges.BasicLatin);
// Act
var retVal = filter.AllowFilter(new OddCodePointFilter());
@ -212,6 +154,64 @@ namespace Microsoft.Framework.WebEncoders
}
}
[Fact]
public void AllowRange()
{
// Arrange
var filter = new CodePointFilter();
// Act
var retVal = filter.AllowRange(UnicodeRanges.LatinExtendedA);
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
for (int i = 0; i < 0x0100; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x0100; i <= 0x017F; i++)
{
Assert.True(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x0180; i <= Char.MaxValue; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
}
[Fact]
public void AllowRanges()
{
// Arrange
var filter = new CodePointFilter();
// Act
var retVal = filter.AllowRanges(UnicodeRanges.LatinExtendedA, UnicodeRanges.LatinExtendedC);
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
for (int i = 0; i < 0x0100; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x0100; i <= 0x017F; i++)
{
Assert.True(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x0180; i < 0x2C60; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x2C60; i <= 0x2C7F; i++)
{
Assert.True(filter.IsCharacterAllowed((char)i));
}
for (int i = 0x2C80; i <= Char.MaxValue; i++)
{
Assert.False(filter.IsCharacterAllowed((char)i));
}
}
[Fact]
public void Clear()
{
@ -234,13 +234,64 @@ namespace Microsoft.Framework.WebEncoders
}
[Fact]
public void ForbidBlock()
public void ForbidChar()
{
// Arrange
var filter = new CodePointFilter(UnicodeRanges.BasicLatin);
// Act
var retVal = filter.ForbidChar('x');
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
Assert.True(filter.IsCharacterAllowed('w'));
Assert.False(filter.IsCharacterAllowed('x'));
Assert.True(filter.IsCharacterAllowed('y'));
Assert.True(filter.IsCharacterAllowed('z'));
}
[Fact]
public void ForbidChars_Array()
{
// Arrange
var filter = new CodePointFilter(UnicodeRanges.BasicLatin);
// Act
var retVal = filter.ForbidChars('x', 'z');
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
Assert.True(filter.IsCharacterAllowed('w'));
Assert.False(filter.IsCharacterAllowed('x'));
Assert.True(filter.IsCharacterAllowed('y'));
Assert.False(filter.IsCharacterAllowed('z'));
}
[Fact]
public void ForbidChars_String()
{
// Arrange
var filter = new CodePointFilter(UnicodeRanges.BasicLatin);
// Act
var retVal = filter.ForbidChars("xz");
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
Assert.True(filter.IsCharacterAllowed('w'));
Assert.False(filter.IsCharacterAllowed('x'));
Assert.True(filter.IsCharacterAllowed('y'));
Assert.False(filter.IsCharacterAllowed('z'));
}
[Fact]
public void ForbidRange()
{
// Arrange
var filter = new CodePointFilter(new OddCodePointFilter());
// Act
var retVal = filter.ForbidBlock(UnicodeBlocks.Specials);
var retVal = filter.ForbidRange(UnicodeRanges.Specials);
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
@ -255,13 +306,13 @@ namespace Microsoft.Framework.WebEncoders
}
[Fact]
public void ForbidBlocks()
public void ForbidRanges()
{
// Arrange
var filter = new CodePointFilter(new OddCodePointFilter());
// Act
var retVal = filter.ForbidBlocks(UnicodeBlocks.BasicLatin, UnicodeBlocks.Specials);
var retVal = filter.ForbidRanges(UnicodeRanges.BasicLatin, UnicodeRanges.Specials);
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
@ -279,68 +330,17 @@ namespace Microsoft.Framework.WebEncoders
}
}
[Fact]
public void ForbidChar()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
// Act
var retVal = filter.ForbidChar('x');
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
Assert.True(filter.IsCharacterAllowed('w'));
Assert.False(filter.IsCharacterAllowed('x'));
Assert.True(filter.IsCharacterAllowed('y'));
Assert.True(filter.IsCharacterAllowed('z'));
}
[Fact]
public void ForbidChars_Array()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
// Act
var retVal = filter.ForbidChars('x', 'z');
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
Assert.True(filter.IsCharacterAllowed('w'));
Assert.False(filter.IsCharacterAllowed('x'));
Assert.True(filter.IsCharacterAllowed('y'));
Assert.False(filter.IsCharacterAllowed('z'));
}
[Fact]
public void ForbidChars_String()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
// Act
var retVal = filter.ForbidChars("xz");
// Assert
Assert.Same(filter, retVal); // returns 'this' instance
Assert.True(filter.IsCharacterAllowed('w'));
Assert.False(filter.IsCharacterAllowed('x'));
Assert.True(filter.IsCharacterAllowed('y'));
Assert.False(filter.IsCharacterAllowed('z'));
}
[Fact]
public void GetAllowedCodePoints()
{
// Arrange
var expected = Enumerable.Range(UnicodeBlocks.BasicLatin.FirstCodePoint, UnicodeBlocks.BasicLatin.BlockSize)
.Concat(Enumerable.Range(UnicodeBlocks.Specials.FirstCodePoint, UnicodeBlocks.Specials.BlockSize))
var expected = Enumerable.Range(UnicodeRanges.BasicLatin.FirstCodePoint, UnicodeRanges.BasicLatin.RangeSize)
.Concat(Enumerable.Range(UnicodeRanges.Specials.FirstCodePoint, UnicodeRanges.Specials.RangeSize))
.Except(new int[] { 'x' })
.OrderBy(i => i)
.ToArray();
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin, UnicodeBlocks.Specials);
var filter = new CodePointFilter(UnicodeRanges.BasicLatin, UnicodeRanges.Specials);
filter.ForbidChar('x');
// Act

View File

@ -19,7 +19,7 @@ namespace Microsoft.Framework.WebEncoders
public void HtmlEncode_PositiveTestCase()
{
// Arrange
IHtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
IHtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All);
StringWriter writer = new StringWriter();
// Act
@ -39,7 +39,7 @@ namespace Microsoft.Framework.WebEncoders
public void JavaScriptStringEncode_PositiveTestCase()
{
// Arrange
IJavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
IJavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All);
StringWriter writer = new StringWriter();
// Act
@ -59,7 +59,7 @@ namespace Microsoft.Framework.WebEncoders
public void UrlEncode_PositiveTestCase()
{
// Arrange
IUrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All);
IUrlEncoder encoder = new UrlEncoder(UnicodeRanges.All);
StringWriter writer = new StringWriter();
// Act

View File

@ -14,7 +14,7 @@ namespace Microsoft.Framework.WebEncoders
public void Ctor_WithCodePointFilter()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
HtmlEncoder encoder = new HtmlEncoder(filter);
// Act & assert
@ -28,10 +28,10 @@ namespace Microsoft.Framework.WebEncoders
}
[Fact]
public void Ctor_WithUnicodeBlocks()
public void Ctor_WithUnicodeRanges()
{
// Arrange
HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols);
// Act & assert
Assert.Equal("&#x61;", encoder.HtmlEncode("a"));
@ -55,7 +55,7 @@ namespace Microsoft.Framework.WebEncoders
public void Default_EquivalentToBasicLatin()
{
// Arrange
HtmlEncoder controlEncoder = new HtmlEncoder(UnicodeBlocks.BasicLatin);
HtmlEncoder controlEncoder = new HtmlEncoder(UnicodeRanges.BasicLatin);
HtmlEncoder testEncoder = HtmlEncoder.Default;
// Act & assert
@ -90,7 +90,7 @@ namespace Microsoft.Framework.WebEncoders
public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
{
// Arrange
HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All);
// Act
string retVal = encoder.HtmlEncode(input);
@ -103,7 +103,7 @@ namespace Microsoft.Framework.WebEncoders
public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
{
// Arrange
HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All);
// Act & assert - BMP chars
for (int i = 0; i <= 0xFFFF; i++)
@ -165,7 +165,7 @@ namespace Microsoft.Framework.WebEncoders
public void HtmlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
{
// Arrange
HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All); // allow all codepoints
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All); // allow all codepoints
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";

View File

@ -14,7 +14,7 @@ namespace Microsoft.Framework.WebEncoders
public void Ctor_WithCodePointFilter()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(filter);
// Act & assert
@ -28,10 +28,10 @@ namespace Microsoft.Framework.WebEncoders
}
[Fact]
public void Ctor_WithUnicodeBlocks()
public void Ctor_WithUnicodeRanges()
{
// Arrange
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols);
// Act & assert
Assert.Equal(@"\u0061", encoder.JavaScriptStringEncode("a"));
@ -55,7 +55,7 @@ namespace Microsoft.Framework.WebEncoders
public void Default_EquivalentToBasicLatin()
{
// Arrange
JavaScriptStringEncoder controlEncoder = new JavaScriptStringEncoder(UnicodeBlocks.BasicLatin);
JavaScriptStringEncoder controlEncoder = new JavaScriptStringEncoder(UnicodeRanges.BasicLatin);
JavaScriptStringEncoder testEncoder = JavaScriptStringEncoder.Default;
// Act & assert
@ -97,7 +97,7 @@ namespace Microsoft.Framework.WebEncoders
public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
{
// Arrange
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All);
// Act
string retVal = encoder.JavaScriptStringEncode(input);
@ -110,7 +110,7 @@ namespace Microsoft.Framework.WebEncoders
public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
{
// Arrange
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All);
// Act & assert - BMP chars
for (int i = 0; i <= 0xFFFF; i++)
@ -183,7 +183,7 @@ namespace Microsoft.Framework.WebEncoders
public void JavaScriptStringEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
{
// Arrange
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All); // allow all codepoints
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All); // allow all codepoints
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
@ -290,7 +290,7 @@ namespace Microsoft.Framework.WebEncoders
// \u-escape these characters instead of using \' and \".
// Arrange
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.All);
// Act
string retVal = encoder.JavaScriptStringEncode(input);
@ -306,8 +306,8 @@ namespace Microsoft.Framework.WebEncoders
// by never emitting HTML-sensitive characters unescaped.
// Arrange
JavaScriptStringEncoder javaScriptStringEncoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeBlocks.All);
JavaScriptStringEncoder javaScriptStringEncoder = new JavaScriptStringEncoder(UnicodeRanges.All);
HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeRanges.All);
// Act & assert
for (int i = 0; i <= 0x10FFFF; i++)

View File

@ -1,86 +0,0 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using Xunit;
namespace Microsoft.Framework.WebEncoders
{
public class UnicodeBlockTests
{
[Theory]
[InlineData(-1, 16)]
[InlineData(1, 16)]
[InlineData(0x10000, 16)]
public void Ctor_FailureCase_FirstCodePoint(int firstCodePoint, int blockSize)
{
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeBlock(firstCodePoint, blockSize));
Assert.Equal("firstCodePoint", ex.ParamName);
}
[Theory]
[InlineData(0x0100, -1)]
[InlineData(0x0100, 15)]
[InlineData(0x0100, 0x10000)]
public void Ctor_FailureCase_BlockSize(int firstCodePoint, int blockSize)
{
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeBlock(firstCodePoint, blockSize));
Assert.Equal("blockSize", ex.ParamName);
}
[Fact]
public void Ctor_SuccessCase()
{
// Act
var block = new UnicodeBlock(0x0100, 128); // Latin Extended-A
// Assert
Assert.Equal(0x0100, block.FirstCodePoint);
Assert.Equal(128, block.BlockSize);
}
[Theory]
[InlineData('\u0001', '\u0002')]
public void FromCharacterRange_FailureCases_FirstChar(char firstChar, char lastChar)
{
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => UnicodeBlock.FromCharacterRange(firstChar, lastChar));
Assert.Equal("firstChar", ex.ParamName);
}
[Theory]
[InlineData('\u0100', '\u007F')]
[InlineData('\u0100', '\u0100')]
[InlineData('\u0100', '\u010E')]
public void FromCharacterRange_FailureCases_LastChar(char firstChar, char lastChar)
{
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => UnicodeBlock.FromCharacterRange(firstChar, lastChar));
Assert.Equal("lastChar", ex.ParamName);
}
[Fact]
public void FromCharacterRange_SuccessCase()
{
// Act
var block = UnicodeBlock.FromCharacterRange('\u0180', '\u024F'); // Latin Extended-B
// Assert
Assert.Equal(0x0180, block.FirstCodePoint);
Assert.Equal(208, block.BlockSize);
}
[Fact]
public void FromCharacterRange_SuccessCase_All()
{
// Act
var block = UnicodeBlock.FromCharacterRange('\u0000', '\uFFFF');
// Assert
Assert.Equal(0, block.FirstCodePoint);
Assert.Equal(0x10000, block.BlockSize);
}
}
}

View File

@ -1,210 +0,0 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Reflection;
using Xunit;
namespace Microsoft.Framework.WebEncoders
{
public class UnicodeBlocksTests
{
[Fact]
public void Block_None()
{
UnicodeBlock block = UnicodeBlocks.None;
Assert.NotNull(block);
// Test 1: the block should be empty
Assert.Equal(0, block.FirstCodePoint);
Assert.Equal(0, block.BlockSize);
// Test 2: calling the property multiple times should cache and return the same block instance
UnicodeBlock block2 = UnicodeBlocks.None;
Assert.Same(block, block2);
}
[Fact]
public void Block_All()
{
Block_Unicode('\u0000', '\uFFFF', nameof(UnicodeBlocks.All));
}
[Theory]
[InlineData('\u0000', '\u007F', nameof(UnicodeBlocks.BasicLatin))]
[InlineData('\u0080', '\u00FF', nameof(UnicodeBlocks.Latin1Supplement))]
[InlineData('\u0100', '\u017F', nameof(UnicodeBlocks.LatinExtendedA))]
[InlineData('\u0180', '\u024F', nameof(UnicodeBlocks.LatinExtendedB))]
[InlineData('\u0250', '\u02AF', nameof(UnicodeBlocks.IPAExtensions))]
[InlineData('\u02B0', '\u02FF', nameof(UnicodeBlocks.SpacingModifierLetters))]
[InlineData('\u0300', '\u036F', nameof(UnicodeBlocks.CombiningDiacriticalMarks))]
[InlineData('\u0370', '\u03FF', nameof(UnicodeBlocks.GreekandCoptic))]
[InlineData('\u0400', '\u04FF', nameof(UnicodeBlocks.Cyrillic))]
[InlineData('\u0500', '\u052F', nameof(UnicodeBlocks.CyrillicSupplement))]
[InlineData('\u0530', '\u058F', nameof(UnicodeBlocks.Armenian))]
[InlineData('\u0590', '\u05FF', nameof(UnicodeBlocks.Hebrew))]
[InlineData('\u0600', '\u06FF', nameof(UnicodeBlocks.Arabic))]
[InlineData('\u0700', '\u074F', nameof(UnicodeBlocks.Syriac))]
[InlineData('\u0750', '\u077F', nameof(UnicodeBlocks.ArabicSupplement))]
[InlineData('\u0780', '\u07BF', nameof(UnicodeBlocks.Thaana))]
[InlineData('\u07C0', '\u07FF', nameof(UnicodeBlocks.NKo))]
[InlineData('\u0800', '\u083F', nameof(UnicodeBlocks.Samaritan))]
[InlineData('\u0840', '\u085F', nameof(UnicodeBlocks.Mandaic))]
[InlineData('\u08A0', '\u08FF', nameof(UnicodeBlocks.ArabicExtendedA))]
[InlineData('\u0900', '\u097F', nameof(UnicodeBlocks.Devanagari))]
[InlineData('\u0980', '\u09FF', nameof(UnicodeBlocks.Bengali))]
[InlineData('\u0A00', '\u0A7F', nameof(UnicodeBlocks.Gurmukhi))]
[InlineData('\u0A80', '\u0AFF', nameof(UnicodeBlocks.Gujarati))]
[InlineData('\u0B00', '\u0B7F', nameof(UnicodeBlocks.Oriya))]
[InlineData('\u0B80', '\u0BFF', nameof(UnicodeBlocks.Tamil))]
[InlineData('\u0C00', '\u0C7F', nameof(UnicodeBlocks.Telugu))]
[InlineData('\u0C80', '\u0CFF', nameof(UnicodeBlocks.Kannada))]
[InlineData('\u0D00', '\u0D7F', nameof(UnicodeBlocks.Malayalam))]
[InlineData('\u0D80', '\u0DFF', nameof(UnicodeBlocks.Sinhala))]
[InlineData('\u0E00', '\u0E7F', nameof(UnicodeBlocks.Thai))]
[InlineData('\u0E80', '\u0EFF', nameof(UnicodeBlocks.Lao))]
[InlineData('\u0F00', '\u0FFF', nameof(UnicodeBlocks.Tibetan))]
[InlineData('\u1000', '\u109F', nameof(UnicodeBlocks.Myanmar))]
[InlineData('\u10A0', '\u10FF', nameof(UnicodeBlocks.Georgian))]
[InlineData('\u1100', '\u11FF', nameof(UnicodeBlocks.HangulJamo))]
[InlineData('\u1200', '\u137F', nameof(UnicodeBlocks.Ethiopic))]
[InlineData('\u1380', '\u139F', nameof(UnicodeBlocks.EthiopicSupplement))]
[InlineData('\u13A0', '\u13FF', nameof(UnicodeBlocks.Cherokee))]
[InlineData('\u1400', '\u167F', nameof(UnicodeBlocks.UnifiedCanadianAboriginalSyllabics))]
[InlineData('\u1680', '\u169F', nameof(UnicodeBlocks.Ogham))]
[InlineData('\u16A0', '\u16FF', nameof(UnicodeBlocks.Runic))]
[InlineData('\u1700', '\u171F', nameof(UnicodeBlocks.Tagalog))]
[InlineData('\u1720', '\u173F', nameof(UnicodeBlocks.Hanunoo))]
[InlineData('\u1740', '\u175F', nameof(UnicodeBlocks.Buhid))]
[InlineData('\u1760', '\u177F', nameof(UnicodeBlocks.Tagbanwa))]
[InlineData('\u1780', '\u17FF', nameof(UnicodeBlocks.Khmer))]
[InlineData('\u1800', '\u18AF', nameof(UnicodeBlocks.Mongolian))]
[InlineData('\u18B0', '\u18FF', nameof(UnicodeBlocks.UnifiedCanadianAboriginalSyllabicsExtended))]
[InlineData('\u1900', '\u194F', nameof(UnicodeBlocks.Limbu))]
[InlineData('\u1950', '\u197F', nameof(UnicodeBlocks.TaiLe))]
[InlineData('\u1980', '\u19DF', nameof(UnicodeBlocks.NewTaiLue))]
[InlineData('\u19E0', '\u19FF', nameof(UnicodeBlocks.KhmerSymbols))]
[InlineData('\u1A00', '\u1A1F', nameof(UnicodeBlocks.Buginese))]
[InlineData('\u1A20', '\u1AAF', nameof(UnicodeBlocks.TaiTham))]
[InlineData('\u1AB0', '\u1AFF', nameof(UnicodeBlocks.CombiningDiacriticalMarksExtended))]
[InlineData('\u1B00', '\u1B7F', nameof(UnicodeBlocks.Balinese))]
[InlineData('\u1B80', '\u1BBF', nameof(UnicodeBlocks.Sundanese))]
[InlineData('\u1BC0', '\u1BFF', nameof(UnicodeBlocks.Batak))]
[InlineData('\u1C00', '\u1C4F', nameof(UnicodeBlocks.Lepcha))]
[InlineData('\u1C50', '\u1C7F', nameof(UnicodeBlocks.OlChiki))]
[InlineData('\u1CC0', '\u1CCF', nameof(UnicodeBlocks.SundaneseSupplement))]
[InlineData('\u1CD0', '\u1CFF', nameof(UnicodeBlocks.VedicExtensions))]
[InlineData('\u1D00', '\u1D7F', nameof(UnicodeBlocks.PhoneticExtensions))]
[InlineData('\u1D80', '\u1DBF', nameof(UnicodeBlocks.PhoneticExtensionsSupplement))]
[InlineData('\u1DC0', '\u1DFF', nameof(UnicodeBlocks.CombiningDiacriticalMarksSupplement))]
[InlineData('\u1E00', '\u1EFF', nameof(UnicodeBlocks.LatinExtendedAdditional))]
[InlineData('\u1F00', '\u1FFF', nameof(UnicodeBlocks.GreekExtended))]
[InlineData('\u2000', '\u206F', nameof(UnicodeBlocks.GeneralPunctuation))]
[InlineData('\u2070', '\u209F', nameof(UnicodeBlocks.SuperscriptsandSubscripts))]
[InlineData('\u20A0', '\u20CF', nameof(UnicodeBlocks.CurrencySymbols))]
[InlineData('\u20D0', '\u20FF', nameof(UnicodeBlocks.CombiningDiacriticalMarksforSymbols))]
[InlineData('\u2100', '\u214F', nameof(UnicodeBlocks.LetterlikeSymbols))]
[InlineData('\u2150', '\u218F', nameof(UnicodeBlocks.NumberForms))]
[InlineData('\u2190', '\u21FF', nameof(UnicodeBlocks.Arrows))]
[InlineData('\u2200', '\u22FF', nameof(UnicodeBlocks.MathematicalOperators))]
[InlineData('\u2300', '\u23FF', nameof(UnicodeBlocks.MiscellaneousTechnical))]
[InlineData('\u2400', '\u243F', nameof(UnicodeBlocks.ControlPictures))]
[InlineData('\u2440', '\u245F', nameof(UnicodeBlocks.OpticalCharacterRecognition))]
[InlineData('\u2460', '\u24FF', nameof(UnicodeBlocks.EnclosedAlphanumerics))]
[InlineData('\u2500', '\u257F', nameof(UnicodeBlocks.BoxDrawing))]
[InlineData('\u2580', '\u259F', nameof(UnicodeBlocks.BlockElements))]
[InlineData('\u25A0', '\u25FF', nameof(UnicodeBlocks.GeometricShapes))]
[InlineData('\u2600', '\u26FF', nameof(UnicodeBlocks.MiscellaneousSymbols))]
[InlineData('\u2700', '\u27BF', nameof(UnicodeBlocks.Dingbats))]
[InlineData('\u27C0', '\u27EF', nameof(UnicodeBlocks.MiscellaneousMathematicalSymbolsA))]
[InlineData('\u27F0', '\u27FF', nameof(UnicodeBlocks.SupplementalArrowsA))]
[InlineData('\u2800', '\u28FF', nameof(UnicodeBlocks.BraillePatterns))]
[InlineData('\u2900', '\u297F', nameof(UnicodeBlocks.SupplementalArrowsB))]
[InlineData('\u2980', '\u29FF', nameof(UnicodeBlocks.MiscellaneousMathematicalSymbolsB))]
[InlineData('\u2A00', '\u2AFF', nameof(UnicodeBlocks.SupplementalMathematicalOperators))]
[InlineData('\u2B00', '\u2BFF', nameof(UnicodeBlocks.MiscellaneousSymbolsandArrows))]
[InlineData('\u2C00', '\u2C5F', nameof(UnicodeBlocks.Glagolitic))]
[InlineData('\u2C60', '\u2C7F', nameof(UnicodeBlocks.LatinExtendedC))]
[InlineData('\u2C80', '\u2CFF', nameof(UnicodeBlocks.Coptic))]
[InlineData('\u2D00', '\u2D2F', nameof(UnicodeBlocks.GeorgianSupplement))]
[InlineData('\u2D30', '\u2D7F', nameof(UnicodeBlocks.Tifinagh))]
[InlineData('\u2D80', '\u2DDF', nameof(UnicodeBlocks.EthiopicExtended))]
[InlineData('\u2DE0', '\u2DFF', nameof(UnicodeBlocks.CyrillicExtendedA))]
[InlineData('\u2E00', '\u2E7F', nameof(UnicodeBlocks.SupplementalPunctuation))]
[InlineData('\u2E80', '\u2EFF', nameof(UnicodeBlocks.CJKRadicalsSupplement))]
[InlineData('\u2F00', '\u2FDF', nameof(UnicodeBlocks.KangxiRadicals))]
[InlineData('\u2FF0', '\u2FFF', nameof(UnicodeBlocks.IdeographicDescriptionCharacters))]
[InlineData('\u3000', '\u303F', nameof(UnicodeBlocks.CJKSymbolsandPunctuation))]
[InlineData('\u3040', '\u309F', nameof(UnicodeBlocks.Hiragana))]
[InlineData('\u30A0', '\u30FF', nameof(UnicodeBlocks.Katakana))]
[InlineData('\u3100', '\u312F', nameof(UnicodeBlocks.Bopomofo))]
[InlineData('\u3130', '\u318F', nameof(UnicodeBlocks.HangulCompatibilityJamo))]
[InlineData('\u3190', '\u319F', nameof(UnicodeBlocks.Kanbun))]
[InlineData('\u31A0', '\u31BF', nameof(UnicodeBlocks.BopomofoExtended))]
[InlineData('\u31C0', '\u31EF', nameof(UnicodeBlocks.CJKStrokes))]
[InlineData('\u31F0', '\u31FF', nameof(UnicodeBlocks.KatakanaPhoneticExtensions))]
[InlineData('\u3200', '\u32FF', nameof(UnicodeBlocks.EnclosedCJKLettersandMonths))]
[InlineData('\u3300', '\u33FF', nameof(UnicodeBlocks.CJKCompatibility))]
[InlineData('\u3400', '\u4DBF', nameof(UnicodeBlocks.CJKUnifiedIdeographsExtensionA))]
[InlineData('\u4DC0', '\u4DFF', nameof(UnicodeBlocks.YijingHexagramSymbols))]
[InlineData('\u4E00', '\u9FFF', nameof(UnicodeBlocks.CJKUnifiedIdeographs))]
[InlineData('\uA000', '\uA48F', nameof(UnicodeBlocks.YiSyllables))]
[InlineData('\uA490', '\uA4CF', nameof(UnicodeBlocks.YiRadicals))]
[InlineData('\uA4D0', '\uA4FF', nameof(UnicodeBlocks.Lisu))]
[InlineData('\uA500', '\uA63F', nameof(UnicodeBlocks.Vai))]
[InlineData('\uA640', '\uA69F', nameof(UnicodeBlocks.CyrillicExtendedB))]
[InlineData('\uA6A0', '\uA6FF', nameof(UnicodeBlocks.Bamum))]
[InlineData('\uA700', '\uA71F', nameof(UnicodeBlocks.ModifierToneLetters))]
[InlineData('\uA720', '\uA7FF', nameof(UnicodeBlocks.LatinExtendedD))]
[InlineData('\uA800', '\uA82F', nameof(UnicodeBlocks.SylotiNagri))]
[InlineData('\uA830', '\uA83F', nameof(UnicodeBlocks.CommonIndicNumberForms))]
[InlineData('\uA840', '\uA87F', nameof(UnicodeBlocks.Phagspa))]
[InlineData('\uA880', '\uA8DF', nameof(UnicodeBlocks.Saurashtra))]
[InlineData('\uA8E0', '\uA8FF', nameof(UnicodeBlocks.DevanagariExtended))]
[InlineData('\uA900', '\uA92F', nameof(UnicodeBlocks.KayahLi))]
[InlineData('\uA930', '\uA95F', nameof(UnicodeBlocks.Rejang))]
[InlineData('\uA960', '\uA97F', nameof(UnicodeBlocks.HangulJamoExtendedA))]
[InlineData('\uA980', '\uA9DF', nameof(UnicodeBlocks.Javanese))]
[InlineData('\uA9E0', '\uA9FF', nameof(UnicodeBlocks.MyanmarExtendedB))]
[InlineData('\uAA00', '\uAA5F', nameof(UnicodeBlocks.Cham))]
[InlineData('\uAA60', '\uAA7F', nameof(UnicodeBlocks.MyanmarExtendedA))]
[InlineData('\uAA80', '\uAADF', nameof(UnicodeBlocks.TaiViet))]
[InlineData('\uAAE0', '\uAAFF', nameof(UnicodeBlocks.MeeteiMayekExtensions))]
[InlineData('\uAB00', '\uAB2F', nameof(UnicodeBlocks.EthiopicExtendedA))]
[InlineData('\uAB30', '\uAB6F', nameof(UnicodeBlocks.LatinExtendedE))]
[InlineData('\uABC0', '\uABFF', nameof(UnicodeBlocks.MeeteiMayek))]
[InlineData('\uAC00', '\uD7AF', nameof(UnicodeBlocks.HangulSyllables))]
[InlineData('\uD7B0', '\uD7FF', nameof(UnicodeBlocks.HangulJamoExtendedB))]
[InlineData('\uF900', '\uFAFF', nameof(UnicodeBlocks.CJKCompatibilityIdeographs))]
[InlineData('\uFB00', '\uFB4F', nameof(UnicodeBlocks.AlphabeticPresentationForms))]
[InlineData('\uFB50', '\uFDFF', nameof(UnicodeBlocks.ArabicPresentationFormsA))]
[InlineData('\uFE00', '\uFE0F', nameof(UnicodeBlocks.VariationSelectors))]
[InlineData('\uFE10', '\uFE1F', nameof(UnicodeBlocks.VerticalForms))]
[InlineData('\uFE20', '\uFE2F', nameof(UnicodeBlocks.CombiningHalfMarks))]
[InlineData('\uFE30', '\uFE4F', nameof(UnicodeBlocks.CJKCompatibilityForms))]
[InlineData('\uFE50', '\uFE6F', nameof(UnicodeBlocks.SmallFormVariants))]
[InlineData('\uFE70', '\uFEFF', nameof(UnicodeBlocks.ArabicPresentationFormsB))]
[InlineData('\uFF00', '\uFFEF', nameof(UnicodeBlocks.HalfwidthandFullwidthForms))]
[InlineData('\uFFF0', '\uFFFF', nameof(UnicodeBlocks.Specials))]
public void Block_Unicode(char first, char last, string blockName)
{
Assert.Equal(0x0, first & 0xF); // first char in any block should be U+nnn0
Assert.Equal(0xF, last & 0xF); // last char in any block should be U+nnnF
Assert.True(first < last); // code point ranges should be ordered
var propInfo = typeof(UnicodeBlocks).GetProperty(blockName, BindingFlags.Public | BindingFlags.Static);
Assert.NotNull(propInfo);
UnicodeBlock block = (UnicodeBlock)propInfo.GetValue(null);
Assert.NotNull(block);
// Test 1: the block should span the range first..last
Assert.Equal(first, block.FirstCodePoint);
Assert.Equal(last, block.FirstCodePoint + block.BlockSize - 1);
// Test 2: calling the property multiple times should cache and return the same block instance
UnicodeBlock block2 = (UnicodeBlock)propInfo.GetValue(null);
Assert.Same(block, block2);
}
}
}

View File

@ -16,7 +16,7 @@ namespace Microsoft.Framework.WebEncoders
public void Ctor_WithCustomFilters()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(filter);
// Act & assert
@ -30,10 +30,10 @@ namespace Microsoft.Framework.WebEncoders
}
[Fact]
public void Ctor_WithUnicodeBlocks()
public void Ctor_WithUnicodeRanges()
{
// Arrange
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(new CodePointFilter(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols));
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(new CodePointFilter(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols));
// Act & assert
Assert.Equal("[U+0061]", encoder.Encode("a"));
@ -45,7 +45,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple()
{
// Arrange
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
const string input = "Hello <>&\'\"+ there!";
const string expected = "Hello [U+003C][U+003E][U+0026][U+0027][U+0022][U+002B] there!";
@ -57,7 +57,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
{
// Arrange
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
// Act & assert - BMP chars
for (int i = 0; i <= 0xFFFF; i++)
@ -120,7 +120,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_BadSurrogates_ReturnsUnicodeReplacementChar()
{
// Arrange
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All); // allow all codepoints
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); // allow all codepoints
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
@ -137,7 +137,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_EmptyStringInput_ReturnsEmptyString()
{
// Arrange
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
// Act & assert
Assert.Equal("", encoder.Encode(""));
@ -147,7 +147,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
{
// Arrange
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
string input = "Hello, there!";
// Act & assert
@ -158,7 +158,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_NullInput_ReturnsNull()
{
// Arrange
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
// Act & assert
Assert.Null(encoder.Encode(null));
@ -167,25 +167,25 @@ namespace Microsoft.Framework.WebEncoders
[Fact]
public void Encode_WithCharsRequiringEncodingAtBeginning()
{
Assert.Equal("[U+0026]Hello, there!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("&Hello, there!"));
Assert.Equal("[U+0026]Hello, there!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("&Hello, there!"));
}
[Fact]
public void Encode_WithCharsRequiringEncodingAtEnd()
{
Assert.Equal("Hello, there![U+0026]", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, there!&"));
Assert.Equal("Hello, there![U+0026]", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, there!&"));
}
[Fact]
public void Encode_WithCharsRequiringEncodingInMiddle()
{
Assert.Equal("Hello, [U+0026]there!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, &there!"));
Assert.Equal("Hello, [U+0026]there!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, &there!"));
}
[Fact]
public void Encode_WithCharsRequiringEncodingInterspersed()
{
Assert.Equal("Hello, [U+003C]there[U+003E]!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, <there>!"));
Assert.Equal("Hello, [U+003C]there[U+003E]!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, <there>!"));
}
[Fact]
@ -222,7 +222,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_CharArray_AllCharsValid()
{
// Arrange
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
StringWriter output = new StringWriter();
// Act
@ -236,7 +236,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_CharArray_AllCharsInvalid()
{
// Arrange
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.None);
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
StringWriter output = new StringWriter();
// Act
@ -250,7 +250,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_CharArray_SomeCharsValid()
{
// Arrange
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
StringWriter output = new StringWriter();
// Act
@ -294,7 +294,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_StringSubstring_AllCharsValid()
{
// Arrange
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
StringWriter output = new StringWriter();
// Act
@ -308,7 +308,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_StringSubstring_EntireString_AllCharsValid_ForwardDirectlyToOutput()
{
// Arrange
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
var mockWriter = new Mock<TextWriter>(MockBehavior.Strict);
mockWriter.Setup(o => o.Write("abc")).Verifiable();
@ -323,7 +323,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_StringSubstring_AllCharsInvalid()
{
// Arrange
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.None);
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
StringWriter output = new StringWriter();
// Act
@ -337,7 +337,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_StringSubstring_SomeCharsValid()
{
// Arrange
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
StringWriter output = new StringWriter();
// Act
@ -351,7 +351,7 @@ namespace Microsoft.Framework.WebEncoders
public void Encode_StringSubstring_EntireString_SomeCharsValid()
{
// Arrange
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
StringWriter output = new StringWriter();
// Act
@ -392,8 +392,8 @@ namespace Microsoft.Framework.WebEncoders
{
}
public CustomUnicodeEncoderBase(params UnicodeBlock[] allowedBlocks)
: this(new CodePointFilter(allowedBlocks))
public CustomUnicodeEncoderBase(params UnicodeRange[] allowedRanges)
: this(new CodePointFilter(allowedRanges))
{
}

View File

@ -0,0 +1,69 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using Xunit;
namespace Microsoft.Framework.WebEncoders
{
public class UnicodeRangeTests
{
[Theory]
[InlineData(-1, 16)]
[InlineData(0x10000, 16)]
public void Ctor_FailureCase_FirstCodePoint(int firstCodePoint, int rangeSize)
{
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeRange(firstCodePoint, rangeSize));
Assert.Equal("firstCodePoint", ex.ParamName);
}
[Theory]
[InlineData(0x0100, -1)]
[InlineData(0x0100, 0x10000)]
public void Ctor_FailureCase_RangeSize(int firstCodePoint, int rangeSize)
{
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeRange(firstCodePoint, rangeSize));
Assert.Equal("rangeSize", ex.ParamName);
}
[Fact]
public void Ctor_SuccessCase()
{
// Act
var range = new UnicodeRange(0x0100, 128); // Latin Extended-A
// Assert
Assert.Equal(0x0100, range.FirstCodePoint);
Assert.Equal(128, range.RangeSize);
}
[Fact]
public void FromSpan_FailureCase()
{
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => UnicodeRange.FromSpan('\u0020', '\u0010'));
Assert.Equal("lastChar", ex.ParamName);
}
[Fact]
public void FromSpan_SuccessCase()
{
// Act
var range = UnicodeRange.FromSpan('\u0180', '\u024F'); // Latin Extended-B
// Assert
Assert.Equal(0x0180, range.FirstCodePoint);
Assert.Equal(208, range.RangeSize);
}
[Fact]
public void FromSpan_SuccessCase_All()
{
// Act
var range = UnicodeRange.FromSpan('\u0000', '\uFFFF');
// Assert
Assert.Equal(0, range.FirstCodePoint);
Assert.Equal(0x10000, range.RangeSize);
}
}
}

View File

@ -0,0 +1,210 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Reflection;
using Xunit;
namespace Microsoft.Framework.WebEncoders
{
public class UnicodeRangesTests
{
[Fact]
public void Range_None()
{
UnicodeRange range = UnicodeRanges.None;
Assert.NotNull(range);
// Test 1: the range should be empty
Assert.Equal(0, range.FirstCodePoint);
Assert.Equal(0, range.RangeSize);
// Test 2: calling the property multiple times should cache and return the same range instance
UnicodeRange range2 = UnicodeRanges.None;
Assert.Same(range, range2);
}
[Fact]
public void Range_All()
{
Range_Unicode('\u0000', '\uFFFF', nameof(UnicodeRanges.All));
}
[Theory]
[InlineData('\u0000', '\u007F', nameof(UnicodeRanges.BasicLatin))]
[InlineData('\u0080', '\u00FF', nameof(UnicodeRanges.Latin1Supplement))]
[InlineData('\u0100', '\u017F', nameof(UnicodeRanges.LatinExtendedA))]
[InlineData('\u0180', '\u024F', nameof(UnicodeRanges.LatinExtendedB))]
[InlineData('\u0250', '\u02AF', nameof(UnicodeRanges.IPAExtensions))]
[InlineData('\u02B0', '\u02FF', nameof(UnicodeRanges.SpacingModifierLetters))]
[InlineData('\u0300', '\u036F', nameof(UnicodeRanges.CombiningDiacriticalMarks))]
[InlineData('\u0370', '\u03FF', nameof(UnicodeRanges.GreekandCoptic))]
[InlineData('\u0400', '\u04FF', nameof(UnicodeRanges.Cyrillic))]
[InlineData('\u0500', '\u052F', nameof(UnicodeRanges.CyrillicSupplement))]
[InlineData('\u0530', '\u058F', nameof(UnicodeRanges.Armenian))]
[InlineData('\u0590', '\u05FF', nameof(UnicodeRanges.Hebrew))]
[InlineData('\u0600', '\u06FF', nameof(UnicodeRanges.Arabic))]
[InlineData('\u0700', '\u074F', nameof(UnicodeRanges.Syriac))]
[InlineData('\u0750', '\u077F', nameof(UnicodeRanges.ArabicSupplement))]
[InlineData('\u0780', '\u07BF', nameof(UnicodeRanges.Thaana))]
[InlineData('\u07C0', '\u07FF', nameof(UnicodeRanges.NKo))]
[InlineData('\u0800', '\u083F', nameof(UnicodeRanges.Samaritan))]
[InlineData('\u0840', '\u085F', nameof(UnicodeRanges.Mandaic))]
[InlineData('\u08A0', '\u08FF', nameof(UnicodeRanges.ArabicExtendedA))]
[InlineData('\u0900', '\u097F', nameof(UnicodeRanges.Devanagari))]
[InlineData('\u0980', '\u09FF', nameof(UnicodeRanges.Bengali))]
[InlineData('\u0A00', '\u0A7F', nameof(UnicodeRanges.Gurmukhi))]
[InlineData('\u0A80', '\u0AFF', nameof(UnicodeRanges.Gujarati))]
[InlineData('\u0B00', '\u0B7F', nameof(UnicodeRanges.Oriya))]
[InlineData('\u0B80', '\u0BFF', nameof(UnicodeRanges.Tamil))]
[InlineData('\u0C00', '\u0C7F', nameof(UnicodeRanges.Telugu))]
[InlineData('\u0C80', '\u0CFF', nameof(UnicodeRanges.Kannada))]
[InlineData('\u0D00', '\u0D7F', nameof(UnicodeRanges.Malayalam))]
[InlineData('\u0D80', '\u0DFF', nameof(UnicodeRanges.Sinhala))]
[InlineData('\u0E00', '\u0E7F', nameof(UnicodeRanges.Thai))]
[InlineData('\u0E80', '\u0EFF', nameof(UnicodeRanges.Lao))]
[InlineData('\u0F00', '\u0FFF', nameof(UnicodeRanges.Tibetan))]
[InlineData('\u1000', '\u109F', nameof(UnicodeRanges.Myanmar))]
[InlineData('\u10A0', '\u10FF', nameof(UnicodeRanges.Georgian))]
[InlineData('\u1100', '\u11FF', nameof(UnicodeRanges.HangulJamo))]
[InlineData('\u1200', '\u137F', nameof(UnicodeRanges.Ethiopic))]
[InlineData('\u1380', '\u139F', nameof(UnicodeRanges.EthiopicSupplement))]
[InlineData('\u13A0', '\u13FF', nameof(UnicodeRanges.Cherokee))]
[InlineData('\u1400', '\u167F', nameof(UnicodeRanges.UnifiedCanadianAboriginalSyllabics))]
[InlineData('\u1680', '\u169F', nameof(UnicodeRanges.Ogham))]
[InlineData('\u16A0', '\u16FF', nameof(UnicodeRanges.Runic))]
[InlineData('\u1700', '\u171F', nameof(UnicodeRanges.Tagalog))]
[InlineData('\u1720', '\u173F', nameof(UnicodeRanges.Hanunoo))]
[InlineData('\u1740', '\u175F', nameof(UnicodeRanges.Buhid))]
[InlineData('\u1760', '\u177F', nameof(UnicodeRanges.Tagbanwa))]
[InlineData('\u1780', '\u17FF', nameof(UnicodeRanges.Khmer))]
[InlineData('\u1800', '\u18AF', nameof(UnicodeRanges.Mongolian))]
[InlineData('\u18B0', '\u18FF', nameof(UnicodeRanges.UnifiedCanadianAboriginalSyllabicsExtended))]
[InlineData('\u1900', '\u194F', nameof(UnicodeRanges.Limbu))]
[InlineData('\u1950', '\u197F', nameof(UnicodeRanges.TaiLe))]
[InlineData('\u1980', '\u19DF', nameof(UnicodeRanges.NewTaiLue))]
[InlineData('\u19E0', '\u19FF', nameof(UnicodeRanges.KhmerSymbols))]
[InlineData('\u1A00', '\u1A1F', nameof(UnicodeRanges.Buginese))]
[InlineData('\u1A20', '\u1AAF', nameof(UnicodeRanges.TaiTham))]
[InlineData('\u1AB0', '\u1AFF', nameof(UnicodeRanges.CombiningDiacriticalMarksExtended))]
[InlineData('\u1B00', '\u1B7F', nameof(UnicodeRanges.Balinese))]
[InlineData('\u1B80', '\u1BBF', nameof(UnicodeRanges.Sundanese))]
[InlineData('\u1BC0', '\u1BFF', nameof(UnicodeRanges.Batak))]
[InlineData('\u1C00', '\u1C4F', nameof(UnicodeRanges.Lepcha))]
[InlineData('\u1C50', '\u1C7F', nameof(UnicodeRanges.OlChiki))]
[InlineData('\u1CC0', '\u1CCF', nameof(UnicodeRanges.SundaneseSupplement))]
[InlineData('\u1CD0', '\u1CFF', nameof(UnicodeRanges.VedicExtensions))]
[InlineData('\u1D00', '\u1D7F', nameof(UnicodeRanges.PhoneticExtensions))]
[InlineData('\u1D80', '\u1DBF', nameof(UnicodeRanges.PhoneticExtensionsSupplement))]
[InlineData('\u1DC0', '\u1DFF', nameof(UnicodeRanges.CombiningDiacriticalMarksSupplement))]
[InlineData('\u1E00', '\u1EFF', nameof(UnicodeRanges.LatinExtendedAdditional))]
[InlineData('\u1F00', '\u1FFF', nameof(UnicodeRanges.GreekExtended))]
[InlineData('\u2000', '\u206F', nameof(UnicodeRanges.GeneralPunctuation))]
[InlineData('\u2070', '\u209F', nameof(UnicodeRanges.SuperscriptsandSubscripts))]
[InlineData('\u20A0', '\u20CF', nameof(UnicodeRanges.CurrencySymbols))]
[InlineData('\u20D0', '\u20FF', nameof(UnicodeRanges.CombiningDiacriticalMarksforSymbols))]
[InlineData('\u2100', '\u214F', nameof(UnicodeRanges.LetterlikeSymbols))]
[InlineData('\u2150', '\u218F', nameof(UnicodeRanges.NumberForms))]
[InlineData('\u2190', '\u21FF', nameof(UnicodeRanges.Arrows))]
[InlineData('\u2200', '\u22FF', nameof(UnicodeRanges.MathematicalOperators))]
[InlineData('\u2300', '\u23FF', nameof(UnicodeRanges.MiscellaneousTechnical))]
[InlineData('\u2400', '\u243F', nameof(UnicodeRanges.ControlPictures))]
[InlineData('\u2440', '\u245F', nameof(UnicodeRanges.OpticalCharacterRecognition))]
[InlineData('\u2460', '\u24FF', nameof(UnicodeRanges.EnclosedAlphanumerics))]
[InlineData('\u2500', '\u257F', nameof(UnicodeRanges.BoxDrawing))]
[InlineData('\u2580', '\u259F', nameof(UnicodeRanges.BlockElements))]
[InlineData('\u25A0', '\u25FF', nameof(UnicodeRanges.GeometricShapes))]
[InlineData('\u2600', '\u26FF', nameof(UnicodeRanges.MiscellaneousSymbols))]
[InlineData('\u2700', '\u27BF', nameof(UnicodeRanges.Dingbats))]
[InlineData('\u27C0', '\u27EF', nameof(UnicodeRanges.MiscellaneousMathematicalSymbolsA))]
[InlineData('\u27F0', '\u27FF', nameof(UnicodeRanges.SupplementalArrowsA))]
[InlineData('\u2800', '\u28FF', nameof(UnicodeRanges.BraillePatterns))]
[InlineData('\u2900', '\u297F', nameof(UnicodeRanges.SupplementalArrowsB))]
[InlineData('\u2980', '\u29FF', nameof(UnicodeRanges.MiscellaneousMathematicalSymbolsB))]
[InlineData('\u2A00', '\u2AFF', nameof(UnicodeRanges.SupplementalMathematicalOperators))]
[InlineData('\u2B00', '\u2BFF', nameof(UnicodeRanges.MiscellaneousSymbolsandArrows))]
[InlineData('\u2C00', '\u2C5F', nameof(UnicodeRanges.Glagolitic))]
[InlineData('\u2C60', '\u2C7F', nameof(UnicodeRanges.LatinExtendedC))]
[InlineData('\u2C80', '\u2CFF', nameof(UnicodeRanges.Coptic))]
[InlineData('\u2D00', '\u2D2F', nameof(UnicodeRanges.GeorgianSupplement))]
[InlineData('\u2D30', '\u2D7F', nameof(UnicodeRanges.Tifinagh))]
[InlineData('\u2D80', '\u2DDF', nameof(UnicodeRanges.EthiopicExtended))]
[InlineData('\u2DE0', '\u2DFF', nameof(UnicodeRanges.CyrillicExtendedA))]
[InlineData('\u2E00', '\u2E7F', nameof(UnicodeRanges.SupplementalPunctuation))]
[InlineData('\u2E80', '\u2EFF', nameof(UnicodeRanges.CJKRadicalsSupplement))]
[InlineData('\u2F00', '\u2FDF', nameof(UnicodeRanges.KangxiRadicals))]
[InlineData('\u2FF0', '\u2FFF', nameof(UnicodeRanges.IdeographicDescriptionCharacters))]
[InlineData('\u3000', '\u303F', nameof(UnicodeRanges.CJKSymbolsandPunctuation))]
[InlineData('\u3040', '\u309F', nameof(UnicodeRanges.Hiragana))]
[InlineData('\u30A0', '\u30FF', nameof(UnicodeRanges.Katakana))]
[InlineData('\u3100', '\u312F', nameof(UnicodeRanges.Bopomofo))]
[InlineData('\u3130', '\u318F', nameof(UnicodeRanges.HangulCompatibilityJamo))]
[InlineData('\u3190', '\u319F', nameof(UnicodeRanges.Kanbun))]
[InlineData('\u31A0', '\u31BF', nameof(UnicodeRanges.BopomofoExtended))]
[InlineData('\u31C0', '\u31EF', nameof(UnicodeRanges.CJKStrokes))]
[InlineData('\u31F0', '\u31FF', nameof(UnicodeRanges.KatakanaPhoneticExtensions))]
[InlineData('\u3200', '\u32FF', nameof(UnicodeRanges.EnclosedCJKLettersandMonths))]
[InlineData('\u3300', '\u33FF', nameof(UnicodeRanges.CJKCompatibility))]
[InlineData('\u3400', '\u4DBF', nameof(UnicodeRanges.CJKUnifiedIdeographsExtensionA))]
[InlineData('\u4DC0', '\u4DFF', nameof(UnicodeRanges.YijingHexagramSymbols))]
[InlineData('\u4E00', '\u9FFF', nameof(UnicodeRanges.CJKUnifiedIdeographs))]
[InlineData('\uA000', '\uA48F', nameof(UnicodeRanges.YiSyllables))]
[InlineData('\uA490', '\uA4CF', nameof(UnicodeRanges.YiRadicals))]
[InlineData('\uA4D0', '\uA4FF', nameof(UnicodeRanges.Lisu))]
[InlineData('\uA500', '\uA63F', nameof(UnicodeRanges.Vai))]
[InlineData('\uA640', '\uA69F', nameof(UnicodeRanges.CyrillicExtendedB))]
[InlineData('\uA6A0', '\uA6FF', nameof(UnicodeRanges.Bamum))]
[InlineData('\uA700', '\uA71F', nameof(UnicodeRanges.ModifierToneLetters))]
[InlineData('\uA720', '\uA7FF', nameof(UnicodeRanges.LatinExtendedD))]
[InlineData('\uA800', '\uA82F', nameof(UnicodeRanges.SylotiNagri))]
[InlineData('\uA830', '\uA83F', nameof(UnicodeRanges.CommonIndicNumberForms))]
[InlineData('\uA840', '\uA87F', nameof(UnicodeRanges.Phagspa))]
[InlineData('\uA880', '\uA8DF', nameof(UnicodeRanges.Saurashtra))]
[InlineData('\uA8E0', '\uA8FF', nameof(UnicodeRanges.DevanagariExtended))]
[InlineData('\uA900', '\uA92F', nameof(UnicodeRanges.KayahLi))]
[InlineData('\uA930', '\uA95F', nameof(UnicodeRanges.Rejang))]
[InlineData('\uA960', '\uA97F', nameof(UnicodeRanges.HangulJamoExtendedA))]
[InlineData('\uA980', '\uA9DF', nameof(UnicodeRanges.Javanese))]
[InlineData('\uA9E0', '\uA9FF', nameof(UnicodeRanges.MyanmarExtendedB))]
[InlineData('\uAA00', '\uAA5F', nameof(UnicodeRanges.Cham))]
[InlineData('\uAA60', '\uAA7F', nameof(UnicodeRanges.MyanmarExtendedA))]
[InlineData('\uAA80', '\uAADF', nameof(UnicodeRanges.TaiViet))]
[InlineData('\uAAE0', '\uAAFF', nameof(UnicodeRanges.MeeteiMayekExtensions))]
[InlineData('\uAB00', '\uAB2F', nameof(UnicodeRanges.EthiopicExtendedA))]
[InlineData('\uAB30', '\uAB6F', nameof(UnicodeRanges.LatinExtendedE))]
[InlineData('\uABC0', '\uABFF', nameof(UnicodeRanges.MeeteiMayek))]
[InlineData('\uAC00', '\uD7AF', nameof(UnicodeRanges.HangulSyllables))]
[InlineData('\uD7B0', '\uD7FF', nameof(UnicodeRanges.HangulJamoExtendedB))]
[InlineData('\uF900', '\uFAFF', nameof(UnicodeRanges.CJKCompatibilityIdeographs))]
[InlineData('\uFB00', '\uFB4F', nameof(UnicodeRanges.AlphabeticPresentationForms))]
[InlineData('\uFB50', '\uFDFF', nameof(UnicodeRanges.ArabicPresentationFormsA))]
[InlineData('\uFE00', '\uFE0F', nameof(UnicodeRanges.VariationSelectors))]
[InlineData('\uFE10', '\uFE1F', nameof(UnicodeRanges.VerticalForms))]
[InlineData('\uFE20', '\uFE2F', nameof(UnicodeRanges.CombiningHalfMarks))]
[InlineData('\uFE30', '\uFE4F', nameof(UnicodeRanges.CJKCompatibilityForms))]
[InlineData('\uFE50', '\uFE6F', nameof(UnicodeRanges.SmallFormVariants))]
[InlineData('\uFE70', '\uFEFF', nameof(UnicodeRanges.ArabicPresentationFormsB))]
[InlineData('\uFF00', '\uFFEF', nameof(UnicodeRanges.HalfwidthandFullwidthForms))]
[InlineData('\uFFF0', '\uFFFF', nameof(UnicodeRanges.Specials))]
public void Range_Unicode(char first, char last, string blockName)
{
Assert.Equal(0x0, first & 0xF); // first char in any block should be U+nnn0
Assert.Equal(0xF, last & 0xF); // last char in any block should be U+nnnF
Assert.True(first < last); // code point ranges should be ordered
var propInfo = typeof(UnicodeRanges).GetProperty(blockName, BindingFlags.Public | BindingFlags.Static);
Assert.NotNull(propInfo);
UnicodeRange range = (UnicodeRange)propInfo.GetValue(null);
Assert.NotNull(range);
// Test 1: the range should span the range first..last
Assert.Equal(first, range.FirstCodePoint);
Assert.Equal(last, range.FirstCodePoint + range.RangeSize - 1);
// Test 2: calling the property multiple times should cache and return the same range instance
UnicodeRange range2 = (UnicodeRange)propInfo.GetValue(null);
Assert.Same(range, range2);
}
}
}

View File

@ -18,7 +18,7 @@ namespace Microsoft.Framework.WebEncoders
public void Ctor_WithCodePointFilter()
{
// Arrange
var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
UrlEncoder encoder = new UrlEncoder(filter);
// Act & assert
@ -32,10 +32,10 @@ namespace Microsoft.Framework.WebEncoders
}
[Fact]
public void Ctor_WithUnicodeBlocks()
public void Ctor_WithUnicodeRanges()
{
// Arrange
UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
UrlEncoder encoder = new UrlEncoder(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols);
// Act & assert
Assert.Equal("%61", encoder.UrlEncode("a"));
@ -59,7 +59,7 @@ namespace Microsoft.Framework.WebEncoders
public void Default_EquivalentToBasicLatin()
{
// Arrange
UrlEncoder controlEncoder = new UrlEncoder(UnicodeBlocks.BasicLatin);
UrlEncoder controlEncoder = new UrlEncoder(UnicodeRanges.BasicLatin);
UrlEncoder testEncoder = UrlEncoder.Default;
// Act & assert
@ -88,7 +88,7 @@ namespace Microsoft.Framework.WebEncoders
public void UrlEncode_AllRangesAllowed_StillEncodesForbiddenChars()
{
// Arrange
UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All);
UrlEncoder encoder = new UrlEncoder(UnicodeRanges.All);
// Act & assert - BMP chars
for (int i = 0; i <= 0xFFFF; i++)
@ -168,7 +168,7 @@ namespace Microsoft.Framework.WebEncoders
public void UrlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
{
// Arrange
UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All); // allow all codepoints
UrlEncoder encoder = new UrlEncoder(UnicodeRanges.All); // allow all codepoints
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
@ -271,8 +271,8 @@ namespace Microsoft.Framework.WebEncoders
// by never emitting HTML-sensitive characters unescaped.
// Arrange
UrlEncoder urlEncoder = new UrlEncoder(UnicodeBlocks.All);
HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeBlocks.All);
UrlEncoder urlEncoder = new UrlEncoder(UnicodeRanges.All);
HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeRanges.All);
// Act & assert
for (int i = 0; i <= 0x10FFFF; i++)

View File

@ -20,22 +20,16 @@ namespace UnicodeTablesGenerator
{
private const string _codePointFiltersGeneratedFormat = @"
/// <summary>
/// Represents the '{0}' Unicode block (U+{1}..U+{2}).
/// A <see cref=""UnicodeRange""/> corresponding to the '{0}' Unicode block (U+{1}..U+{2}).
/// </summary>
/// <remarks>
/// See http://www.unicode.org/charts/PDF/U{1}.pdf for the full set of characters in this block.
/// </remarks>
public static UnicodeBlock {3}
{{
get
{{
return Volatile.Read(ref _{4}) ?? CreateBlock(ref _{4}, first: '\u{1}', last: '\u{2}');
}}
}}
private static UnicodeBlock _{4};
public static UnicodeRange {3} => Volatile.Read(ref _{4}) ?? CreateRange(ref _{4}, first: '\u{1}', last: '\u{2}');
private static UnicodeRange _{4};
";
private const string _codePointFiltersTestsGeneratedFormat = @"[InlineData('\u{1}', '\u{2}', nameof(UnicodeBlocks.{0}))]";
private const string _codePointFiltersTestsGeneratedFormat = @"[InlineData('\u{1}', '\u{2}', nameof(UnicodeRanges.{0}))]";
private static void Main()
{
@ -73,8 +67,8 @@ private static UnicodeBlock _{4};
testCodeBuilder.AppendLine();
}
File.WriteAllText("UnicodeBlocks.generated.txt", runtimeCodeBuilder.ToString());
File.WriteAllText("UnicodeBlocksTests.generated.txt", testCodeBuilder.ToString());
File.WriteAllText("UnicodeRanges.generated.txt", runtimeCodeBuilder.ToString());
File.WriteAllText("UnicodeRangesTests.generated.txt", testCodeBuilder.ToString());
}
private static string RemoveAllNonAlphanumeric(string blockName)