Add unit tests and code generation routines
This commit is contained in:
parent
c5dc9abff6
commit
0ca24147a0
|
|
@ -39,6 +39,8 @@ Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Net.Http.Headers.
|
|||
EndProject
|
||||
Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Framework.WebEncoders", "src\Microsoft.Framework.WebEncoders\Microsoft.Framework.WebEncoders.kproj", "{DD2CE416-765E-4000-A03E-C2FF165DA1B6}"
|
||||
EndProject
|
||||
Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Framework.WebEncoders.Tests", "test\Microsoft.Framework.WebEncoders.Tests\Microsoft.Framework.WebEncoders.Tests.kproj", "{7AE2731D-43CD-4CF8-850A-4914DE2CE930}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
|
|
@ -215,6 +217,18 @@ Global
|
|||
{DD2CE416-765E-4000-A03E-C2FF165DA1B6}.Release|Mixed Platforms.Build.0 = Release|Any CPU
|
||||
{DD2CE416-765E-4000-A03E-C2FF165DA1B6}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{DD2CE416-765E-4000-A03E-C2FF165DA1B6}.Release|x86.Build.0 = Release|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Mixed Platforms.Build.0 = Release|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|x86.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
|
@ -236,5 +250,6 @@ Global
|
|||
{60AA2FDB-8121-4826-8D00-9A143FEFAF66} = {A5A15F1C-885A-452A-A731-B0173DDBD913}
|
||||
{E6BB7AD1-BD10-4A23-B780-F4A86ADF00D1} = {F31FF137-390C-49BF-A3BD-7C6ED3597C21}
|
||||
{DD2CE416-765E-4000-A03E-C2FF165DA1B6} = {A5A15F1C-885A-452A-A731-B0173DDBD913}
|
||||
{7AE2731D-43CD-4CF8-850A-4914DE2CE930} = {F31FF137-390C-49BF-A3BD-7C6ED3597C21}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
|
|||
|
|
@ -25,6 +25,13 @@ namespace Microsoft.Framework.WebEncoders
|
|||
_allowedCharsBitmap[index] |= 0x1U << offset;
|
||||
}
|
||||
|
||||
// Marks all characters as forbidden (must be returned encoded)
|
||||
public void Clear()
|
||||
{
|
||||
Array.Clear(_allowedCharsBitmap, 0, _allowedCharsBitmap.Length);
|
||||
}
|
||||
|
||||
// Creates a deep copy of this bitmap
|
||||
public AllowedCharsBitmap Clone()
|
||||
{
|
||||
AllowedCharsBitmap retVal;
|
||||
|
|
|
|||
|
|
@ -151,6 +151,18 @@ namespace Microsoft.Framework.WebEncoders
|
|||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disallows all characters through the filter.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// The 'this' instance.
|
||||
/// </returns>
|
||||
public CodePointFilter Clear()
|
||||
{
|
||||
_allowedCharsBitmap.Clear();
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disallows all characters in the specified Unicode character block through the filter.
|
||||
/// </summary>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,7 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("Microsoft.Framework.WebEncoders.Tests")]
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class AllowedCharsBitmapTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_EmptyByDefault()
|
||||
{
|
||||
// Act
|
||||
var bitmap = new AllowedCharsBitmap();
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(bitmap.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Allow_Forbid_ZigZag()
|
||||
{
|
||||
// Arrange
|
||||
var bitmap = new AllowedCharsBitmap();
|
||||
|
||||
// Act
|
||||
// The only chars which are allowed are those whose code points are multiples of 3 or 7
|
||||
// who aren't also multiples of 5. Exception: multiples of 35 are allowed.
|
||||
for (int i = 0; i <= Char.MaxValue; i += 3)
|
||||
{
|
||||
bitmap.AllowCharacter((char)i);
|
||||
}
|
||||
for (int i = 0; i <= Char.MaxValue; i += 5)
|
||||
{
|
||||
bitmap.ForbidCharacter((char)i);
|
||||
}
|
||||
for (int i = 0; i <= Char.MaxValue; i += 7)
|
||||
{
|
||||
bitmap.AllowCharacter((char)i);
|
||||
}
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
bool isAllowed = false;
|
||||
if (i % 3 == 0) { isAllowed = true; }
|
||||
if (i % 5 == 0) { isAllowed = false; }
|
||||
if (i % 7 == 0) { isAllowed = true; }
|
||||
Assert.Equal(isAllowed, bitmap.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Clear_ForbidsEverything()
|
||||
{
|
||||
// Arrange
|
||||
var bitmap = new AllowedCharsBitmap();
|
||||
for (int i = 1; i <= Char.MaxValue; i++)
|
||||
{
|
||||
bitmap.AllowCharacter((char)i);
|
||||
}
|
||||
|
||||
// Act
|
||||
bitmap.Clear();
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(bitmap.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Clone_MakesDeepCopy()
|
||||
{
|
||||
// Arrange
|
||||
var originalBitmap = new AllowedCharsBitmap();
|
||||
originalBitmap.AllowCharacter('x');
|
||||
|
||||
// Act
|
||||
var clonedBitmap = originalBitmap.Clone();
|
||||
clonedBitmap.AllowCharacter('y');
|
||||
|
||||
// Assert
|
||||
Assert.True(originalBitmap.IsCharacterAllowed('x'));
|
||||
Assert.False(originalBitmap.IsCharacterAllowed('y'));
|
||||
Assert.True(clonedBitmap.IsCharacterAllowed('x'));
|
||||
Assert.True(clonedBitmap.IsCharacterAllowed('y'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidUndefinedCharacters_RemovesUndefinedChars()
|
||||
{
|
||||
// Arrange
|
||||
// We only allow odd-numbered characters in this test so that
|
||||
// we can validate that we properly merged the two bitmaps together
|
||||
// rather than simply overwriting the target.
|
||||
var bitmap = new AllowedCharsBitmap();
|
||||
for (int i = 1; i <= Char.MaxValue; i += 2)
|
||||
{
|
||||
bitmap.AllowCharacter((char)i);
|
||||
}
|
||||
|
||||
// Act
|
||||
bitmap.ForbidUndefinedCharacters();
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
Assert.False(bitmap.IsCharacterAllowed((char)i)); // these chars were never allowed in the original description
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert.Equal(UnicodeHelpers.IsCharacterDefined((char)i), bitmap.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,369 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class CodePointFilterTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_Parameterless_DefaultsToBasicLatin()
|
||||
{
|
||||
// Act
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= 0x007F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0080; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_OtherCodePointFilterAsInterface()
|
||||
{
|
||||
// Arrange
|
||||
var originalFilter = new OddCodePointFilter();
|
||||
|
||||
// Act
|
||||
var newFilter = new CodePointFilter(originalFilter);
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.Equal((i % 2) == 1, newFilter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_OtherCodePointFilterAsConcreteType_Clones()
|
||||
{
|
||||
// Arrange
|
||||
var originalFilter = new CodePointFilter(UnicodeBlocks.None).AllowChar('x');
|
||||
|
||||
// Act
|
||||
var newFilter = new CodePointFilter(originalFilter).AllowChar('y');
|
||||
|
||||
// Assert
|
||||
Assert.True(originalFilter.IsCharacterAllowed('x'));
|
||||
Assert.False(originalFilter.IsCharacterAllowed('y'));
|
||||
Assert.True(newFilter.IsCharacterAllowed('x'));
|
||||
Assert.True(newFilter.IsCharacterAllowed('y'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_UnicodeBlocks()
|
||||
{
|
||||
// Act
|
||||
var filter = new CodePointFilter(UnicodeBlocks.LatinExtendedA, UnicodeBlocks.LatinExtendedC);
|
||||
|
||||
// Assert
|
||||
for (int i = 0; i < 0x0100; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0100; i <= 0x017F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0180; i < 0x2C60; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x2C60; i <= 0x2C7F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x2C80; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowBlock()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.None);
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowBlock(UnicodeBlocks.LatinExtendedA);
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i < 0x0100; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0100; i <= 0x017F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0180; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowBlocks()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.None);
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowBlocks(UnicodeBlocks.LatinExtendedA, UnicodeBlocks.LatinExtendedC);
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i < 0x0100; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0100; i <= 0x017F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0180; i < 0x2C60; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x2C60; i <= 0x2C7F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x2C80; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowChar()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowChar('\u0100');
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('\u0100'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0101'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowChars_Array()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowChars('\u0100', '\u0102');
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('\u0100'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0101'));
|
||||
Assert.True(filter.IsCharacterAllowed('\u0102'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0103'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowChars_String()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowChars("\u0100\u0102");
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('\u0100'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0101'));
|
||||
Assert.True(filter.IsCharacterAllowed('\u0102'));
|
||||
Assert.False(filter.IsCharacterAllowed('\u0103'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AllowFilter()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
|
||||
|
||||
// Act
|
||||
var retVal = filter.AllowFilter(new OddCodePointFilter());
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i <= 0x007F; i++)
|
||||
{
|
||||
Assert.True(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0080; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Clear()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter();
|
||||
for (int i = 1; i <= Char.MaxValue; i++)
|
||||
{
|
||||
filter.AllowChar((char)i);
|
||||
}
|
||||
|
||||
// Act
|
||||
var retVal = filter.Clear();
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidBlock()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(new OddCodePointFilter());
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidBlock(UnicodeBlocks.Specials);
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i <= 0xFFEF; i++)
|
||||
{
|
||||
Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0xFFF0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidBlocks()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(new OddCodePointFilter());
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidBlocks(UnicodeBlocks.BasicLatin, UnicodeBlocks.Specials);
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
for (int i = 0; i <= 0x007F; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0x0080; i <= 0xFFEF; i++)
|
||||
{
|
||||
Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
for (int i = 0xFFF0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
Assert.False(filter.IsCharacterAllowed((char)i));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidChar()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidChar('x');
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('w'));
|
||||
Assert.False(filter.IsCharacterAllowed('x'));
|
||||
Assert.True(filter.IsCharacterAllowed('y'));
|
||||
Assert.True(filter.IsCharacterAllowed('z'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidChars_Array()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidChars('x', 'z');
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('w'));
|
||||
Assert.False(filter.IsCharacterAllowed('x'));
|
||||
Assert.True(filter.IsCharacterAllowed('y'));
|
||||
Assert.False(filter.IsCharacterAllowed('z'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ForbidChars_String()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
|
||||
|
||||
// Act
|
||||
var retVal = filter.ForbidChars("xz");
|
||||
|
||||
// Assert
|
||||
Assert.Same(filter, retVal); // returns 'this' instance
|
||||
Assert.True(filter.IsCharacterAllowed('w'));
|
||||
Assert.False(filter.IsCharacterAllowed('x'));
|
||||
Assert.True(filter.IsCharacterAllowed('y'));
|
||||
Assert.False(filter.IsCharacterAllowed('z'));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetAllowedCodePoints()
|
||||
{
|
||||
// Arrange
|
||||
var expected = Enumerable.Range(UnicodeBlocks.BasicLatin.FirstCodePoint, UnicodeBlocks.BasicLatin.BlockSize)
|
||||
.Concat(Enumerable.Range(UnicodeBlocks.Specials.FirstCodePoint, UnicodeBlocks.Specials.BlockSize))
|
||||
.Except(new int[] { 'x' })
|
||||
.OrderBy(i => i)
|
||||
.ToArray();
|
||||
|
||||
var filter = new CodePointFilter(UnicodeBlocks.BasicLatin, UnicodeBlocks.Specials);
|
||||
filter.ForbidChar('x');
|
||||
|
||||
// Act
|
||||
var retVal = filter.GetAllowedCodePoints().OrderBy(i => i).ToArray();
|
||||
|
||||
// Assert
|
||||
Assert.Equal<int>(expected, retVal);
|
||||
}
|
||||
|
||||
// a code point filter which allows only odd code points through
|
||||
private sealed class OddCodePointFilter : ICodePointFilter
|
||||
{
|
||||
public IEnumerable<int> GetAllowedCodePoints()
|
||||
{
|
||||
for (int i = 1; i <= Char.MaxValue; i += 2)
|
||||
{
|
||||
yield return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class EncoderCommonTests
|
||||
{
|
||||
[Theory]
|
||||
[InlineData(10000, 3, 16 * 1024)] // we cap at 16k chars
|
||||
[InlineData(5000, 3, 15000)] // haven't exceeded the 16k cap
|
||||
[InlineData(40000, 3, 40000)] // if we spill over the LOH, we still allocate an output buffer equivalent in length to the input buffer
|
||||
[InlineData(512, Int32.MaxValue, 16 * 1024)] // make sure we can handle numeric overflow
|
||||
public void GetCapacityOfOutputStringBuilder(int numCharsToEncode, int worstCaseOutputCharsPerInputChar, int expectedResult)
|
||||
{
|
||||
Assert.Equal(expectedResult, EncoderCommon.GetCapacityOfOutputStringBuilder(numCharsToEncode, worstCaseOutputCharsPerInputChar));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class EncoderExtensionsTests
|
||||
{
|
||||
[Fact]
|
||||
public void HtmlEncode_ParameterChecks()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => EncoderExtensions.HtmlEncode(null, "Hello!", new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_PositiveTestCase()
|
||||
{
|
||||
// Arrange
|
||||
IHtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
|
||||
StringWriter writer = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.HtmlEncode("Hello+there!", writer);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("Hello+there!", writer.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_ParameterChecks()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => EncoderExtensions.JavaScriptStringEncode(null, "Hello!", new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_PositiveTestCase()
|
||||
{
|
||||
// Arrange
|
||||
IJavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
|
||||
StringWriter writer = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.JavaScriptStringEncode("Hello+there!", writer);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(@"Hello\u002Bthere!", writer.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_ParameterChecks()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => EncoderExtensions.UrlEncode(null, "Hello!", new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_PositiveTestCase()
|
||||
{
|
||||
// Arrange
|
||||
IUrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All);
|
||||
StringWriter writer = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.UrlEncode("Hello+there!", writer);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("Hello%2Bthere!", writer.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Newtonsoft.Json;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
internal static class Entities
|
||||
{
|
||||
public static readonly IDictionary<string, ParsedEntity> ParsedEntities = GetParsedEntities();
|
||||
|
||||
private static IDictionary<string, ParsedEntity> GetParsedEntities()
|
||||
{
|
||||
// read all entries
|
||||
string allEntitiesText = ReadEntitiesJsonFile();
|
||||
var deserializedRawData = new JsonSerializer().Deserialize<IDictionary<string, ParsedEntity>>(new JsonTextReader(new StringReader(allEntitiesText)));
|
||||
|
||||
// strip out all entries which aren't of the form "&entity;"
|
||||
foreach (var key in deserializedRawData.Keys.ToArray() /* dupe since we're mutating original structure */)
|
||||
{
|
||||
if (!key.StartsWith("&", StringComparison.Ordinal) || !key.EndsWith(";", StringComparison.Ordinal))
|
||||
{
|
||||
deserializedRawData.Remove(key);
|
||||
}
|
||||
}
|
||||
return deserializedRawData;
|
||||
}
|
||||
|
||||
private static string ReadEntitiesJsonFile()
|
||||
{
|
||||
return File.ReadAllText("entities.json");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public static class Extensions
|
||||
{
|
||||
public static string[] ReadAllLines(this TextReader reader)
|
||||
{
|
||||
return ReadAllLinesImpl(reader).ToArray();
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ReadAllLinesImpl(TextReader reader)
|
||||
{
|
||||
string line;
|
||||
while ((line = reader.ReadLine()) != null)
|
||||
{
|
||||
yield return line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,269 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class HtmlEncoderTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_WithCodePointFilter()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
||||
HtmlEncoder encoder = new HtmlEncoder(filter);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.HtmlEncode("a"));
|
||||
Assert.Equal("b", encoder.HtmlEncode("b"));
|
||||
Assert.Equal("c", encoder.HtmlEncode("c"));
|
||||
Assert.Equal("d", encoder.HtmlEncode("d"));
|
||||
Assert.Equal("�", encoder.HtmlEncode("\0")); // we still always encode control chars
|
||||
Assert.Equal("&", encoder.HtmlEncode("&")); // we still always encode HTML-special chars
|
||||
Assert.Equal("", encoder.HtmlEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithUnicodeBlocks()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.HtmlEncode("a"));
|
||||
Assert.Equal("\u00E9", encoder.HtmlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("\u2601", encoder.HtmlEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithNoParameters_DefaultsToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.HtmlEncode("a"));
|
||||
Assert.Equal("é", encoder.HtmlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("☁", encoder.HtmlEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_EquivalentToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder controlEncoder = new HtmlEncoder(UnicodeBlocks.BasicLatin);
|
||||
HtmlEncoder testEncoder = HtmlEncoder.Default;
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
if (!IsSurrogateCodePoint(i))
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
Assert.Equal(controlEncoder.HtmlEncode(input), testEncoder.HtmlEncode(input));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_ReturnsSingletonInstance()
|
||||
{
|
||||
// Act
|
||||
HtmlEncoder encoder1 = HtmlEncoder.Default;
|
||||
HtmlEncoder encoder2 = HtmlEncoder.Default;
|
||||
|
||||
// Assert
|
||||
Assert.Same(encoder1, encoder2);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("<", "<")]
|
||||
[InlineData(">", ">")]
|
||||
[InlineData("&", "&")]
|
||||
[InlineData("'", "'")]
|
||||
[InlineData("\"", """)]
|
||||
[InlineData("+", "+")]
|
||||
public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
|
||||
|
||||
// Act
|
||||
string retVal = encoder.HtmlEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
|
||||
|
||||
// Act & assert - BMP chars
|
||||
for (int i = 0; i <= 0xFFFF; i++)
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
string expected;
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
|
||||
}
|
||||
else
|
||||
{
|
||||
if (input == "<") { expected = "<"; }
|
||||
else if (input == ">") { expected = ">"; }
|
||||
else if (input == "&") { expected = "&"; }
|
||||
else if (input == "\"") { expected = """; }
|
||||
else
|
||||
{
|
||||
bool mustEncode = false;
|
||||
if (i == '\'' || i == '+')
|
||||
{
|
||||
mustEncode = true; // apostrophe, plus
|
||||
}
|
||||
else if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
|
||||
{
|
||||
mustEncode = true; // control char
|
||||
}
|
||||
else if (!UnicodeHelpers.IsCharacterDefined((char)i))
|
||||
{
|
||||
mustEncode = true; // undefined (or otherwise disallowed) char
|
||||
}
|
||||
|
||||
if (mustEncode)
|
||||
{
|
||||
expected = String.Format(CultureInfo.InvariantCulture, "&#x{0:X};", i);
|
||||
}
|
||||
else
|
||||
{
|
||||
expected = input; // no encoding
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string retVal = encoder.HtmlEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
// Act & assert - astral chars
|
||||
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
||||
{
|
||||
string input = Char.ConvertFromUtf32(i);
|
||||
string expected = String.Format(CultureInfo.InvariantCulture, "&#x{0:X};", i);
|
||||
string retVal = encoder.HtmlEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All); // allow all codepoints
|
||||
|
||||
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
||||
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
||||
const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD𐏿e\uFFFD";
|
||||
|
||||
// Act
|
||||
string retVal = encoder.HtmlEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_EmptyStringInput_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("", encoder.HtmlEncode(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
string input = "Hello, there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Same(input, encoder.HtmlEncode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_NullInput_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Null(encoder.HtmlEncode(null));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_WithCharsRequiringEncodingAtBeginning()
|
||||
{
|
||||
Assert.Equal("&Hello, there!", new HtmlEncoder().HtmlEncode("&Hello, there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_WithCharsRequiringEncodingAtEnd()
|
||||
{
|
||||
Assert.Equal("Hello, there!&", new HtmlEncoder().HtmlEncode("Hello, there!&"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_WithCharsRequiringEncodingInMiddle()
|
||||
{
|
||||
Assert.Equal("Hello, &there!", new HtmlEncoder().HtmlEncode("Hello, &there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_WithCharsRequiringEncodingInterspersed()
|
||||
{
|
||||
Assert.Equal("Hello, <there>!", new HtmlEncoder().HtmlEncode("Hello, <there>!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_CharArray()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.HtmlEncode("Hello+world!".ToCharArray(), 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("lo+wo", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HtmlEncode_StringSubstring()
|
||||
{
|
||||
// Arrange
|
||||
HtmlEncoder encoder = new HtmlEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.HtmlEncode("Hello+world!", 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("lo+wo", output.ToString());
|
||||
}
|
||||
|
||||
private static bool IsSurrogateCodePoint(int codePoint)
|
||||
{
|
||||
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,331 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class JavaScriptStringEncoderTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_WithCodePointFilter()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(filter);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.JavaScriptStringEncode("a"));
|
||||
Assert.Equal("b", encoder.JavaScriptStringEncode("b"));
|
||||
Assert.Equal(@"\u0063", encoder.JavaScriptStringEncode("c"));
|
||||
Assert.Equal("d", encoder.JavaScriptStringEncode("d"));
|
||||
Assert.Equal(@"\u0000", encoder.JavaScriptStringEncode("\0")); // we still always encode control chars
|
||||
Assert.Equal(@"\u0026", encoder.JavaScriptStringEncode("&")); // we still always encode HTML-special chars
|
||||
Assert.Equal(@"\uFFFF", encoder.JavaScriptStringEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithUnicodeBlocks()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal(@"\u0061", encoder.JavaScriptStringEncode("a"));
|
||||
Assert.Equal("\u00E9", encoder.JavaScriptStringEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("\u2601", encoder.JavaScriptStringEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithNoParameters_DefaultsToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.JavaScriptStringEncode("a"));
|
||||
Assert.Equal(@"\u00E9", encoder.JavaScriptStringEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal(@"\u2601", encoder.JavaScriptStringEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_EquivalentToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder controlEncoder = new JavaScriptStringEncoder(UnicodeBlocks.BasicLatin);
|
||||
JavaScriptStringEncoder testEncoder = JavaScriptStringEncoder.Default;
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
if (!IsSurrogateCodePoint(i))
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
Assert.Equal(controlEncoder.JavaScriptStringEncode(input), testEncoder.JavaScriptStringEncode(input));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_ReturnsSingletonInstance()
|
||||
{
|
||||
// Act
|
||||
JavaScriptStringEncoder encoder1 = JavaScriptStringEncoder.Default;
|
||||
JavaScriptStringEncoder encoder2 = JavaScriptStringEncoder.Default;
|
||||
|
||||
// Assert
|
||||
Assert.Same(encoder1, encoder2);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("<", @"\u003C")]
|
||||
[InlineData(">", @"\u003E")]
|
||||
[InlineData("&", @"\u0026")]
|
||||
[InlineData("'", @"\u0027")]
|
||||
[InlineData("\"", @"\u0022")]
|
||||
[InlineData("+", @"\u002B")]
|
||||
[InlineData("\\", @"\\")]
|
||||
[InlineData("/", @"\/")]
|
||||
[InlineData("\b", @"\b")]
|
||||
[InlineData("\f", @"\f")]
|
||||
[InlineData("\n", @"\n")]
|
||||
[InlineData("\t", @"\t")]
|
||||
[InlineData("\r", @"\r")]
|
||||
public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
|
||||
|
||||
// Act
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
|
||||
|
||||
// Act & assert - BMP chars
|
||||
for (int i = 0; i <= 0xFFFF; i++)
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
string expected;
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
|
||||
}
|
||||
else
|
||||
{
|
||||
if (input == "\b") { expected = @"\b"; }
|
||||
else if (input == "\t") { expected = @"\t"; }
|
||||
else if (input == "\n") { expected = @"\n"; }
|
||||
else if (input == "\f") { expected = @"\f"; }
|
||||
else if (input == "\r") { expected = @"\r"; }
|
||||
else if (input == "\\") { expected = @"\\"; }
|
||||
else if (input == "/") { expected = @"\/"; }
|
||||
else
|
||||
{
|
||||
bool mustEncode = false;
|
||||
switch (i)
|
||||
{
|
||||
case '<':
|
||||
case '>':
|
||||
case '&':
|
||||
case '\"':
|
||||
case '\'':
|
||||
case '+':
|
||||
mustEncode = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
|
||||
{
|
||||
mustEncode = true; // control char
|
||||
}
|
||||
else if (!UnicodeHelpers.IsCharacterDefined((char)i))
|
||||
{
|
||||
mustEncode = true; // undefined (or otherwise disallowed) char
|
||||
}
|
||||
|
||||
if (mustEncode)
|
||||
{
|
||||
expected = String.Format(CultureInfo.InvariantCulture, @"\u{0:X4}", i);
|
||||
}
|
||||
else
|
||||
{
|
||||
expected = input; // no encoding
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
// Act & assert - astral chars
|
||||
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
||||
{
|
||||
string input = Char.ConvertFromUtf32(i);
|
||||
string expected = String.Format(CultureInfo.InvariantCulture, @"\u{0:X4}\u{1:X4}", (uint)input[0], (uint)input[1]);
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All); // allow all codepoints
|
||||
|
||||
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
||||
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
||||
const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD\\uD800\\uDFFFe\uFFFD"; // 'D800' 'DFFF' was preserved since it's valid
|
||||
|
||||
// Act
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_EmptyStringInput_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("", encoder.JavaScriptStringEncode(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
string input = "Hello, there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Same(input, encoder.JavaScriptStringEncode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_NullInput_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Null(encoder.JavaScriptStringEncode(null));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_WithCharsRequiringEncodingAtBeginning()
|
||||
{
|
||||
Assert.Equal(@"\u0026Hello, there!", new JavaScriptStringEncoder().JavaScriptStringEncode("&Hello, there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_WithCharsRequiringEncodingAtEnd()
|
||||
{
|
||||
Assert.Equal(@"Hello, there!\u0026", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, there!&"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_WithCharsRequiringEncodingInMiddle()
|
||||
{
|
||||
Assert.Equal(@"Hello, \u0026there!", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, &there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_WithCharsRequiringEncodingInterspersed()
|
||||
{
|
||||
Assert.Equal(@"Hello, \u003Cthere\u003E!", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, <there>!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_CharArray()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.JavaScriptStringEncode("Hello+world!".ToCharArray(), 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(@"lo\u002Bwo", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_StringSubstring()
|
||||
{
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.JavaScriptStringEncode("Hello+world!", 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(@"lo\u002Bwo", output.ToString());
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("\"", @"\u0022")]
|
||||
[InlineData("'", @"\u0027")]
|
||||
public void JavaScriptStringEncode_Quotes(string input, string expected)
|
||||
{
|
||||
// Per the design document, we provide additional defense-in-depth
|
||||
// against breaking out of HTML attributes by having the encoders
|
||||
// never emit the ' or " characters. This means that we want to
|
||||
// \u-escape these characters instead of using \' and \".
|
||||
|
||||
// Arrange
|
||||
JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
|
||||
|
||||
// Act
|
||||
string retVal = encoder.JavaScriptStringEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void JavaScriptStringEncode_DoesNotOutputHtmlSensitiveCharacters()
|
||||
{
|
||||
// Per the design document, we provide additional defense-in-depth
|
||||
// by never emitting HTML-sensitive characters unescaped.
|
||||
|
||||
// Arrange
|
||||
JavaScriptStringEncoder javaScriptStringEncoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
|
||||
HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeBlocks.All);
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= 0x10FFFF; i++)
|
||||
{
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
continue; // surrogates don't matter here
|
||||
}
|
||||
|
||||
string javaScriptStringEncoded = javaScriptStringEncoder.JavaScriptStringEncode(Char.ConvertFromUtf32(i));
|
||||
string thenHtmlEncoded = htmlEncoder.HtmlEncode(javaScriptStringEncoded);
|
||||
Assert.Equal(javaScriptStringEncoded, thenHtmlEncoded); // should have contained no HTML-sensitive characters
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsSurrogateCodePoint(int codePoint)
|
||||
{
|
||||
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion>
|
||||
<VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VSToolsPath)\AspNet\Microsoft.Web.AspNet.Props" Condition="'$(VSToolsPath)' != ''" />
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>7ae2731d-43cd-4cf8-850a-4914de2ce930</ProjectGuid>
|
||||
<BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">..\..\artifacts\obj\$(MSBuildProjectName)</BaseIntermediateOutputPath>
|
||||
<OutputPath Condition="'$(OutputPath)'=='' ">..\..\artifacts\bin\$(MSBuildProjectName)\</OutputPath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<SchemaVersion>2.0</SchemaVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VSToolsPath)\AspNet\Microsoft.Web.AspNet.targets" Condition="'$(VSToolsPath)' != ''" />
|
||||
</Project>
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using Newtonsoft.Json;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
internal sealed class ParsedEntity
|
||||
{
|
||||
[JsonProperty("codepoints")]
|
||||
public int[] Codepoints { get; set; }
|
||||
|
||||
[JsonProperty("characters")]
|
||||
public string DecodedString { get; set; }
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class UnicodeBlockTests
|
||||
{
|
||||
[Theory]
|
||||
[InlineData(-1, 16)]
|
||||
[InlineData(1, 16)]
|
||||
[InlineData(0x10000, 16)]
|
||||
public void Ctor_FailureCase_FirstCodePoint(int firstCodePoint, int blockSize)
|
||||
{
|
||||
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeBlock(firstCodePoint, blockSize));
|
||||
Assert.Equal("firstCodePoint", ex.ParamName);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(0x0100, -1)]
|
||||
[InlineData(0x0100, 15)]
|
||||
[InlineData(0x0100, 0x10000)]
|
||||
public void Ctor_FailureCase_BlockSize(int firstCodePoint, int blockSize)
|
||||
{
|
||||
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeBlock(firstCodePoint, blockSize));
|
||||
Assert.Equal("blockSize", ex.ParamName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_SuccessCase()
|
||||
{
|
||||
// Act
|
||||
var block = new UnicodeBlock(0x0100, 128); // Latin Extended-A
|
||||
|
||||
// Assert
|
||||
Assert.Equal(0x0100, block.FirstCodePoint);
|
||||
Assert.Equal(128, block.BlockSize);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData('\u0001', '\u0002')]
|
||||
public void FromCharacterRange_FailureCases_FirstChar(char firstChar, char lastChar)
|
||||
{
|
||||
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => UnicodeBlock.FromCharacterRange(firstChar, lastChar));
|
||||
Assert.Equal("firstChar", ex.ParamName);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData('\u0100', '\u007F')]
|
||||
[InlineData('\u0100', '\u0100')]
|
||||
[InlineData('\u0100', '\u010E')]
|
||||
public void FromCharacterRange_FailureCases_LastChar(char firstChar, char lastChar)
|
||||
{
|
||||
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => UnicodeBlock.FromCharacterRange(firstChar, lastChar));
|
||||
Assert.Equal("lastChar", ex.ParamName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FromCharacterRange_SuccessCase()
|
||||
{
|
||||
// Act
|
||||
var block = UnicodeBlock.FromCharacterRange('\u0180', '\u024F'); // Latin Extended-B
|
||||
|
||||
// Assert
|
||||
Assert.Equal(0x0180, block.FirstCodePoint);
|
||||
Assert.Equal(208, block.BlockSize);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FromCharacterRange_SuccessCase_All()
|
||||
{
|
||||
// Act
|
||||
var block = UnicodeBlock.FromCharacterRange('\u0000', '\uFFFF');
|
||||
|
||||
// Assert
|
||||
Assert.Equal(0, block.FirstCodePoint);
|
||||
Assert.Equal(0x10000, block.BlockSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,210 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Reflection;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class UnicodeBlocksTests
|
||||
{
|
||||
[Fact]
|
||||
public void Block_None()
|
||||
{
|
||||
UnicodeBlock block = UnicodeBlocks.None;
|
||||
Assert.NotNull(block);
|
||||
|
||||
// Test 1: the block should be empty
|
||||
Assert.Equal(0, block.FirstCodePoint);
|
||||
Assert.Equal(0, block.BlockSize);
|
||||
|
||||
// Test 2: calling the property multiple times should cache and return the same block instance
|
||||
UnicodeBlock block2 = UnicodeBlocks.None;
|
||||
Assert.Same(block, block2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Block_All()
|
||||
{
|
||||
Block_Unicode('\u0000', '\uFFFF', nameof(UnicodeBlocks.All));
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData('\u0000', '\u007F', nameof(UnicodeBlocks.BasicLatin))]
|
||||
[InlineData('\u0080', '\u00FF', nameof(UnicodeBlocks.Latin1Supplement))]
|
||||
[InlineData('\u0100', '\u017F', nameof(UnicodeBlocks.LatinExtendedA))]
|
||||
[InlineData('\u0180', '\u024F', nameof(UnicodeBlocks.LatinExtendedB))]
|
||||
[InlineData('\u0250', '\u02AF', nameof(UnicodeBlocks.IPAExtensions))]
|
||||
[InlineData('\u02B0', '\u02FF', nameof(UnicodeBlocks.SpacingModifierLetters))]
|
||||
[InlineData('\u0300', '\u036F', nameof(UnicodeBlocks.CombiningDiacriticalMarks))]
|
||||
[InlineData('\u0370', '\u03FF', nameof(UnicodeBlocks.GreekandCoptic))]
|
||||
[InlineData('\u0400', '\u04FF', nameof(UnicodeBlocks.Cyrillic))]
|
||||
[InlineData('\u0500', '\u052F', nameof(UnicodeBlocks.CyrillicSupplement))]
|
||||
[InlineData('\u0530', '\u058F', nameof(UnicodeBlocks.Armenian))]
|
||||
[InlineData('\u0590', '\u05FF', nameof(UnicodeBlocks.Hebrew))]
|
||||
[InlineData('\u0600', '\u06FF', nameof(UnicodeBlocks.Arabic))]
|
||||
[InlineData('\u0700', '\u074F', nameof(UnicodeBlocks.Syriac))]
|
||||
[InlineData('\u0750', '\u077F', nameof(UnicodeBlocks.ArabicSupplement))]
|
||||
[InlineData('\u0780', '\u07BF', nameof(UnicodeBlocks.Thaana))]
|
||||
[InlineData('\u07C0', '\u07FF', nameof(UnicodeBlocks.NKo))]
|
||||
[InlineData('\u0800', '\u083F', nameof(UnicodeBlocks.Samaritan))]
|
||||
[InlineData('\u0840', '\u085F', nameof(UnicodeBlocks.Mandaic))]
|
||||
[InlineData('\u08A0', '\u08FF', nameof(UnicodeBlocks.ArabicExtendedA))]
|
||||
[InlineData('\u0900', '\u097F', nameof(UnicodeBlocks.Devanagari))]
|
||||
[InlineData('\u0980', '\u09FF', nameof(UnicodeBlocks.Bengali))]
|
||||
[InlineData('\u0A00', '\u0A7F', nameof(UnicodeBlocks.Gurmukhi))]
|
||||
[InlineData('\u0A80', '\u0AFF', nameof(UnicodeBlocks.Gujarati))]
|
||||
[InlineData('\u0B00', '\u0B7F', nameof(UnicodeBlocks.Oriya))]
|
||||
[InlineData('\u0B80', '\u0BFF', nameof(UnicodeBlocks.Tamil))]
|
||||
[InlineData('\u0C00', '\u0C7F', nameof(UnicodeBlocks.Telugu))]
|
||||
[InlineData('\u0C80', '\u0CFF', nameof(UnicodeBlocks.Kannada))]
|
||||
[InlineData('\u0D00', '\u0D7F', nameof(UnicodeBlocks.Malayalam))]
|
||||
[InlineData('\u0D80', '\u0DFF', nameof(UnicodeBlocks.Sinhala))]
|
||||
[InlineData('\u0E00', '\u0E7F', nameof(UnicodeBlocks.Thai))]
|
||||
[InlineData('\u0E80', '\u0EFF', nameof(UnicodeBlocks.Lao))]
|
||||
[InlineData('\u0F00', '\u0FFF', nameof(UnicodeBlocks.Tibetan))]
|
||||
[InlineData('\u1000', '\u109F', nameof(UnicodeBlocks.Myanmar))]
|
||||
[InlineData('\u10A0', '\u10FF', nameof(UnicodeBlocks.Georgian))]
|
||||
[InlineData('\u1100', '\u11FF', nameof(UnicodeBlocks.HangulJamo))]
|
||||
[InlineData('\u1200', '\u137F', nameof(UnicodeBlocks.Ethiopic))]
|
||||
[InlineData('\u1380', '\u139F', nameof(UnicodeBlocks.EthiopicSupplement))]
|
||||
[InlineData('\u13A0', '\u13FF', nameof(UnicodeBlocks.Cherokee))]
|
||||
[InlineData('\u1400', '\u167F', nameof(UnicodeBlocks.UnifiedCanadianAboriginalSyllabics))]
|
||||
[InlineData('\u1680', '\u169F', nameof(UnicodeBlocks.Ogham))]
|
||||
[InlineData('\u16A0', '\u16FF', nameof(UnicodeBlocks.Runic))]
|
||||
[InlineData('\u1700', '\u171F', nameof(UnicodeBlocks.Tagalog))]
|
||||
[InlineData('\u1720', '\u173F', nameof(UnicodeBlocks.Hanunoo))]
|
||||
[InlineData('\u1740', '\u175F', nameof(UnicodeBlocks.Buhid))]
|
||||
[InlineData('\u1760', '\u177F', nameof(UnicodeBlocks.Tagbanwa))]
|
||||
[InlineData('\u1780', '\u17FF', nameof(UnicodeBlocks.Khmer))]
|
||||
[InlineData('\u1800', '\u18AF', nameof(UnicodeBlocks.Mongolian))]
|
||||
[InlineData('\u18B0', '\u18FF', nameof(UnicodeBlocks.UnifiedCanadianAboriginalSyllabicsExtended))]
|
||||
[InlineData('\u1900', '\u194F', nameof(UnicodeBlocks.Limbu))]
|
||||
[InlineData('\u1950', '\u197F', nameof(UnicodeBlocks.TaiLe))]
|
||||
[InlineData('\u1980', '\u19DF', nameof(UnicodeBlocks.NewTaiLue))]
|
||||
[InlineData('\u19E0', '\u19FF', nameof(UnicodeBlocks.KhmerSymbols))]
|
||||
[InlineData('\u1A00', '\u1A1F', nameof(UnicodeBlocks.Buginese))]
|
||||
[InlineData('\u1A20', '\u1AAF', nameof(UnicodeBlocks.TaiTham))]
|
||||
[InlineData('\u1AB0', '\u1AFF', nameof(UnicodeBlocks.CombiningDiacriticalMarksExtended))]
|
||||
[InlineData('\u1B00', '\u1B7F', nameof(UnicodeBlocks.Balinese))]
|
||||
[InlineData('\u1B80', '\u1BBF', nameof(UnicodeBlocks.Sundanese))]
|
||||
[InlineData('\u1BC0', '\u1BFF', nameof(UnicodeBlocks.Batak))]
|
||||
[InlineData('\u1C00', '\u1C4F', nameof(UnicodeBlocks.Lepcha))]
|
||||
[InlineData('\u1C50', '\u1C7F', nameof(UnicodeBlocks.OlChiki))]
|
||||
[InlineData('\u1CC0', '\u1CCF', nameof(UnicodeBlocks.SundaneseSupplement))]
|
||||
[InlineData('\u1CD0', '\u1CFF', nameof(UnicodeBlocks.VedicExtensions))]
|
||||
[InlineData('\u1D00', '\u1D7F', nameof(UnicodeBlocks.PhoneticExtensions))]
|
||||
[InlineData('\u1D80', '\u1DBF', nameof(UnicodeBlocks.PhoneticExtensionsSupplement))]
|
||||
[InlineData('\u1DC0', '\u1DFF', nameof(UnicodeBlocks.CombiningDiacriticalMarksSupplement))]
|
||||
[InlineData('\u1E00', '\u1EFF', nameof(UnicodeBlocks.LatinExtendedAdditional))]
|
||||
[InlineData('\u1F00', '\u1FFF', nameof(UnicodeBlocks.GreekExtended))]
|
||||
[InlineData('\u2000', '\u206F', nameof(UnicodeBlocks.GeneralPunctuation))]
|
||||
[InlineData('\u2070', '\u209F', nameof(UnicodeBlocks.SuperscriptsandSubscripts))]
|
||||
[InlineData('\u20A0', '\u20CF', nameof(UnicodeBlocks.CurrencySymbols))]
|
||||
[InlineData('\u20D0', '\u20FF', nameof(UnicodeBlocks.CombiningDiacriticalMarksforSymbols))]
|
||||
[InlineData('\u2100', '\u214F', nameof(UnicodeBlocks.LetterlikeSymbols))]
|
||||
[InlineData('\u2150', '\u218F', nameof(UnicodeBlocks.NumberForms))]
|
||||
[InlineData('\u2190', '\u21FF', nameof(UnicodeBlocks.Arrows))]
|
||||
[InlineData('\u2200', '\u22FF', nameof(UnicodeBlocks.MathematicalOperators))]
|
||||
[InlineData('\u2300', '\u23FF', nameof(UnicodeBlocks.MiscellaneousTechnical))]
|
||||
[InlineData('\u2400', '\u243F', nameof(UnicodeBlocks.ControlPictures))]
|
||||
[InlineData('\u2440', '\u245F', nameof(UnicodeBlocks.OpticalCharacterRecognition))]
|
||||
[InlineData('\u2460', '\u24FF', nameof(UnicodeBlocks.EnclosedAlphanumerics))]
|
||||
[InlineData('\u2500', '\u257F', nameof(UnicodeBlocks.BoxDrawing))]
|
||||
[InlineData('\u2580', '\u259F', nameof(UnicodeBlocks.BlockElements))]
|
||||
[InlineData('\u25A0', '\u25FF', nameof(UnicodeBlocks.GeometricShapes))]
|
||||
[InlineData('\u2600', '\u26FF', nameof(UnicodeBlocks.MiscellaneousSymbols))]
|
||||
[InlineData('\u2700', '\u27BF', nameof(UnicodeBlocks.Dingbats))]
|
||||
[InlineData('\u27C0', '\u27EF', nameof(UnicodeBlocks.MiscellaneousMathematicalSymbolsA))]
|
||||
[InlineData('\u27F0', '\u27FF', nameof(UnicodeBlocks.SupplementalArrowsA))]
|
||||
[InlineData('\u2800', '\u28FF', nameof(UnicodeBlocks.BraillePatterns))]
|
||||
[InlineData('\u2900', '\u297F', nameof(UnicodeBlocks.SupplementalArrowsB))]
|
||||
[InlineData('\u2980', '\u29FF', nameof(UnicodeBlocks.MiscellaneousMathematicalSymbolsB))]
|
||||
[InlineData('\u2A00', '\u2AFF', nameof(UnicodeBlocks.SupplementalMathematicalOperators))]
|
||||
[InlineData('\u2B00', '\u2BFF', nameof(UnicodeBlocks.MiscellaneousSymbolsandArrows))]
|
||||
[InlineData('\u2C00', '\u2C5F', nameof(UnicodeBlocks.Glagolitic))]
|
||||
[InlineData('\u2C60', '\u2C7F', nameof(UnicodeBlocks.LatinExtendedC))]
|
||||
[InlineData('\u2C80', '\u2CFF', nameof(UnicodeBlocks.Coptic))]
|
||||
[InlineData('\u2D00', '\u2D2F', nameof(UnicodeBlocks.GeorgianSupplement))]
|
||||
[InlineData('\u2D30', '\u2D7F', nameof(UnicodeBlocks.Tifinagh))]
|
||||
[InlineData('\u2D80', '\u2DDF', nameof(UnicodeBlocks.EthiopicExtended))]
|
||||
[InlineData('\u2DE0', '\u2DFF', nameof(UnicodeBlocks.CyrillicExtendedA))]
|
||||
[InlineData('\u2E00', '\u2E7F', nameof(UnicodeBlocks.SupplementalPunctuation))]
|
||||
[InlineData('\u2E80', '\u2EFF', nameof(UnicodeBlocks.CJKRadicalsSupplement))]
|
||||
[InlineData('\u2F00', '\u2FDF', nameof(UnicodeBlocks.KangxiRadicals))]
|
||||
[InlineData('\u2FF0', '\u2FFF', nameof(UnicodeBlocks.IdeographicDescriptionCharacters))]
|
||||
[InlineData('\u3000', '\u303F', nameof(UnicodeBlocks.CJKSymbolsandPunctuation))]
|
||||
[InlineData('\u3040', '\u309F', nameof(UnicodeBlocks.Hiragana))]
|
||||
[InlineData('\u30A0', '\u30FF', nameof(UnicodeBlocks.Katakana))]
|
||||
[InlineData('\u3100', '\u312F', nameof(UnicodeBlocks.Bopomofo))]
|
||||
[InlineData('\u3130', '\u318F', nameof(UnicodeBlocks.HangulCompatibilityJamo))]
|
||||
[InlineData('\u3190', '\u319F', nameof(UnicodeBlocks.Kanbun))]
|
||||
[InlineData('\u31A0', '\u31BF', nameof(UnicodeBlocks.BopomofoExtended))]
|
||||
[InlineData('\u31C0', '\u31EF', nameof(UnicodeBlocks.CJKStrokes))]
|
||||
[InlineData('\u31F0', '\u31FF', nameof(UnicodeBlocks.KatakanaPhoneticExtensions))]
|
||||
[InlineData('\u3200', '\u32FF', nameof(UnicodeBlocks.EnclosedCJKLettersandMonths))]
|
||||
[InlineData('\u3300', '\u33FF', nameof(UnicodeBlocks.CJKCompatibility))]
|
||||
[InlineData('\u3400', '\u4DBF', nameof(UnicodeBlocks.CJKUnifiedIdeographsExtensionA))]
|
||||
[InlineData('\u4DC0', '\u4DFF', nameof(UnicodeBlocks.YijingHexagramSymbols))]
|
||||
[InlineData('\u4E00', '\u9FFF', nameof(UnicodeBlocks.CJKUnifiedIdeographs))]
|
||||
[InlineData('\uA000', '\uA48F', nameof(UnicodeBlocks.YiSyllables))]
|
||||
[InlineData('\uA490', '\uA4CF', nameof(UnicodeBlocks.YiRadicals))]
|
||||
[InlineData('\uA4D0', '\uA4FF', nameof(UnicodeBlocks.Lisu))]
|
||||
[InlineData('\uA500', '\uA63F', nameof(UnicodeBlocks.Vai))]
|
||||
[InlineData('\uA640', '\uA69F', nameof(UnicodeBlocks.CyrillicExtendedB))]
|
||||
[InlineData('\uA6A0', '\uA6FF', nameof(UnicodeBlocks.Bamum))]
|
||||
[InlineData('\uA700', '\uA71F', nameof(UnicodeBlocks.ModifierToneLetters))]
|
||||
[InlineData('\uA720', '\uA7FF', nameof(UnicodeBlocks.LatinExtendedD))]
|
||||
[InlineData('\uA800', '\uA82F', nameof(UnicodeBlocks.SylotiNagri))]
|
||||
[InlineData('\uA830', '\uA83F', nameof(UnicodeBlocks.CommonIndicNumberForms))]
|
||||
[InlineData('\uA840', '\uA87F', nameof(UnicodeBlocks.Phagspa))]
|
||||
[InlineData('\uA880', '\uA8DF', nameof(UnicodeBlocks.Saurashtra))]
|
||||
[InlineData('\uA8E0', '\uA8FF', nameof(UnicodeBlocks.DevanagariExtended))]
|
||||
[InlineData('\uA900', '\uA92F', nameof(UnicodeBlocks.KayahLi))]
|
||||
[InlineData('\uA930', '\uA95F', nameof(UnicodeBlocks.Rejang))]
|
||||
[InlineData('\uA960', '\uA97F', nameof(UnicodeBlocks.HangulJamoExtendedA))]
|
||||
[InlineData('\uA980', '\uA9DF', nameof(UnicodeBlocks.Javanese))]
|
||||
[InlineData('\uA9E0', '\uA9FF', nameof(UnicodeBlocks.MyanmarExtendedB))]
|
||||
[InlineData('\uAA00', '\uAA5F', nameof(UnicodeBlocks.Cham))]
|
||||
[InlineData('\uAA60', '\uAA7F', nameof(UnicodeBlocks.MyanmarExtendedA))]
|
||||
[InlineData('\uAA80', '\uAADF', nameof(UnicodeBlocks.TaiViet))]
|
||||
[InlineData('\uAAE0', '\uAAFF', nameof(UnicodeBlocks.MeeteiMayekExtensions))]
|
||||
[InlineData('\uAB00', '\uAB2F', nameof(UnicodeBlocks.EthiopicExtendedA))]
|
||||
[InlineData('\uAB30', '\uAB6F', nameof(UnicodeBlocks.LatinExtendedE))]
|
||||
[InlineData('\uABC0', '\uABFF', nameof(UnicodeBlocks.MeeteiMayek))]
|
||||
[InlineData('\uAC00', '\uD7AF', nameof(UnicodeBlocks.HangulSyllables))]
|
||||
[InlineData('\uD7B0', '\uD7FF', nameof(UnicodeBlocks.HangulJamoExtendedB))]
|
||||
[InlineData('\uF900', '\uFAFF', nameof(UnicodeBlocks.CJKCompatibilityIdeographs))]
|
||||
[InlineData('\uFB00', '\uFB4F', nameof(UnicodeBlocks.AlphabeticPresentationForms))]
|
||||
[InlineData('\uFB50', '\uFDFF', nameof(UnicodeBlocks.ArabicPresentationFormsA))]
|
||||
[InlineData('\uFE00', '\uFE0F', nameof(UnicodeBlocks.VariationSelectors))]
|
||||
[InlineData('\uFE10', '\uFE1F', nameof(UnicodeBlocks.VerticalForms))]
|
||||
[InlineData('\uFE20', '\uFE2F', nameof(UnicodeBlocks.CombiningHalfMarks))]
|
||||
[InlineData('\uFE30', '\uFE4F', nameof(UnicodeBlocks.CJKCompatibilityForms))]
|
||||
[InlineData('\uFE50', '\uFE6F', nameof(UnicodeBlocks.SmallFormVariants))]
|
||||
[InlineData('\uFE70', '\uFEFF', nameof(UnicodeBlocks.ArabicPresentationFormsB))]
|
||||
[InlineData('\uFF00', '\uFFEF', nameof(UnicodeBlocks.HalfwidthandFullwidthForms))]
|
||||
[InlineData('\uFFF0', '\uFFFF', nameof(UnicodeBlocks.Specials))]
|
||||
public void Block_Unicode(char first, char last, string blockName)
|
||||
{
|
||||
Assert.Equal(0x0, first & 0xF); // first char in any block should be U+nnn0
|
||||
Assert.Equal(0xF, last & 0xF); // last char in any block should be U+nnnF
|
||||
Assert.True(first < last); // code point ranges should be ordered
|
||||
|
||||
var propInfo = typeof(UnicodeBlocks).GetProperty(blockName, BindingFlags.Public | BindingFlags.Static);
|
||||
Assert.NotNull(propInfo);
|
||||
|
||||
UnicodeBlock block = (UnicodeBlock)propInfo.GetValue(null);
|
||||
Assert.NotNull(block);
|
||||
|
||||
// Test 1: the block should span the range first..last
|
||||
Assert.Equal(first, block.FirstCodePoint);
|
||||
Assert.Equal(last, block.FirstCodePoint + block.BlockSize - 1);
|
||||
|
||||
// Test 2: calling the property multiple times should cache and return the same block instance
|
||||
UnicodeBlock block2 = (UnicodeBlock)propInfo.GetValue(null);
|
||||
Assert.Same(block, block2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,406 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using Moq;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class UnicodeEncoderBaseTests
|
||||
{
|
||||
[Fact]
|
||||
public void Ctor_WithCustomFilters()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(filter);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.Encode("a"));
|
||||
Assert.Equal("b", encoder.Encode("b"));
|
||||
Assert.Equal("[U+0063]", encoder.Encode("c"));
|
||||
Assert.Equal("d", encoder.Encode("d"));
|
||||
Assert.Equal("[U+0000]", encoder.Encode("\0")); // we still always encode control chars
|
||||
Assert.Equal("[U+0026]", encoder.Encode("&")); // we still always encode HTML-special chars
|
||||
Assert.Equal("[U+FFFF]", encoder.Encode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithUnicodeBlocks()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(new CodePointFilter(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols));
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("[U+0061]", encoder.Encode("a"));
|
||||
Assert.Equal("\u00E9", encoder.Encode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("\u2601", encoder.Encode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
const string input = "Hello <>&\'\"+ there!";
|
||||
const string expected = "Hello [U+003C][U+003E][U+0026][U+0027][U+0022][U+002B] there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal(expected, encoder.Encode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
|
||||
// Act & assert - BMP chars
|
||||
for (int i = 0; i <= 0xFFFF; i++)
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
string expected;
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
|
||||
}
|
||||
else
|
||||
{
|
||||
bool mustEncode = false;
|
||||
switch (i)
|
||||
{
|
||||
case '<':
|
||||
case '>':
|
||||
case '&':
|
||||
case '\"':
|
||||
case '\'':
|
||||
case '+':
|
||||
mustEncode = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
|
||||
{
|
||||
mustEncode = true; // control char
|
||||
}
|
||||
else if (!UnicodeHelpers.IsCharacterDefined((char)i))
|
||||
{
|
||||
mustEncode = true; // undefined (or otherwise disallowed) char
|
||||
}
|
||||
|
||||
if (mustEncode)
|
||||
{
|
||||
expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", i);
|
||||
}
|
||||
else
|
||||
{
|
||||
expected = input; // no encoding
|
||||
}
|
||||
}
|
||||
|
||||
string retVal = encoder.Encode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
// Act & assert - astral chars
|
||||
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
||||
{
|
||||
string input = Char.ConvertFromUtf32(i);
|
||||
string expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X}]", i);
|
||||
string retVal = encoder.Encode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All); // allow all codepoints
|
||||
|
||||
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
||||
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
||||
const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD[U+103FF]e\uFFFD";
|
||||
|
||||
// Act
|
||||
string retVal = encoder.Encode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_EmptyStringInput_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("", encoder.Encode(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
string input = "Hello, there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Same(input, encoder.Encode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_NullInput_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
|
||||
// Act & assert
|
||||
Assert.Null(encoder.Encode(null));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_WithCharsRequiringEncodingAtBeginning()
|
||||
{
|
||||
Assert.Equal("[U+0026]Hello, there!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("&Hello, there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_WithCharsRequiringEncodingAtEnd()
|
||||
{
|
||||
Assert.Equal("Hello, there![U+0026]", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, there!&"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_WithCharsRequiringEncodingInMiddle()
|
||||
{
|
||||
Assert.Equal("Hello, [U+0026]there!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, &there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_WithCharsRequiringEncodingInterspersed()
|
||||
{
|
||||
Assert.Equal("Hello, [U+003C]there[U+003E]!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, <there>!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_ParameterChecking_NegativeTestCases()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
|
||||
|
||||
// Act & assert
|
||||
Assert.Throws<ArgumentNullException>(() => encoder.Encode((char[])null, 0, 0, new StringWriter()));
|
||||
Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc".ToCharArray(), 0, 3, null));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), -1, 2, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, 2, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 4, 0, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, -1, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 1, 3, new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_ZeroCount_DoesNotCallIntoTextWriter()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
|
||||
TextWriter output = new Mock<TextWriter>(MockBehavior.Strict).Object;
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc".ToCharArray(), 2, 0, output);
|
||||
|
||||
// Assert
|
||||
// If we got this far (without TextWriter throwing), success!
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_AllCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("xy", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_AllCharsInvalid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.None);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("[U+0078][U+0079]", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_CharArray_SomeCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz".ToCharArray(), 2, 3, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("c[U+0026]x", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_ParameterChecking_NegativeTestCases()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
|
||||
|
||||
// Act & assert
|
||||
Assert.Throws<ArgumentNullException>(() => encoder.Encode((string)null, 0, 0, new StringWriter()));
|
||||
Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc", 0, 3, null));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", -1, 2, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, 2, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 4, 0, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, -1, new StringWriter()));
|
||||
Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 1, 3, new StringWriter()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_ZeroCount_DoesNotCallIntoTextWriter()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
|
||||
TextWriter output = new Mock<TextWriter>(MockBehavior.Strict).Object;
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc", 2, 0, output);
|
||||
|
||||
// Assert
|
||||
// If we got this far (without TextWriter throwing), success!
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_AllCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz", 4, 2, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("xy", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_EntireString_AllCharsValid_ForwardDirectlyToOutput()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
var mockWriter = new Mock<TextWriter>(MockBehavior.Strict);
|
||||
mockWriter.Setup(o => o.Write("abc")).Verifiable();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc", 0, 3, mockWriter.Object);
|
||||
|
||||
// Assert
|
||||
mockWriter.Verify();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_AllCharsInvalid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.None);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz", 4, 2, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("[U+0078][U+0079]", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_SomeCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.Encode("abc&xyz", 2, 3, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("c[U+0026]x", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Encode_StringSubstring_EntireString_SomeCharsValid()
|
||||
{
|
||||
// Arrange
|
||||
CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
|
||||
StringWriter output = new StringWriter();
|
||||
|
||||
// Act
|
||||
const string input = "abc&xyz";
|
||||
encoder.Encode(input, 0, input.Length, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("abc[U+0026]xyz", output.ToString());
|
||||
}
|
||||
|
||||
private static bool IsSurrogateCodePoint(int codePoint)
|
||||
{
|
||||
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
||||
}
|
||||
|
||||
private sealed class CustomCodePointFilter : ICodePointFilter
|
||||
{
|
||||
private readonly int[] _allowedCodePoints;
|
||||
|
||||
public CustomCodePointFilter(params int[] allowedCodePoints)
|
||||
{
|
||||
_allowedCodePoints = allowedCodePoints;
|
||||
}
|
||||
|
||||
public IEnumerable<int> GetAllowedCodePoints()
|
||||
{
|
||||
return _allowedCodePoints;
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class CustomUnicodeEncoderBase : UnicodeEncoderBase
|
||||
{
|
||||
// We pass a (known bad) value of 1 for 'max output chars per input char',
|
||||
// which also tests that the code behaves properly even if the original
|
||||
// estimate is incorrect.
|
||||
public CustomUnicodeEncoderBase(CodePointFilter filter)
|
||||
: base(filter, maxOutputCharsPerInputChar: 1)
|
||||
{
|
||||
}
|
||||
|
||||
public CustomUnicodeEncoderBase(params UnicodeBlock[] allowedBlocks)
|
||||
: this(new CodePointFilter(allowedBlocks))
|
||||
{
|
||||
}
|
||||
|
||||
protected override void WriteEncodedScalar(ref Writer writer, uint value)
|
||||
{
|
||||
writer.Write(String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", value));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public unsafe class UnicodeHelpersTests
|
||||
{
|
||||
private const int UnicodeReplacementChar = '\uFFFD';
|
||||
|
||||
private static readonly UTF8Encoding _utf8EncodingThrowOnInvalidBytes = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
|
||||
|
||||
[Fact]
|
||||
public void GetDefinedCharacterBitmap_ReturnsSingletonInstance()
|
||||
{
|
||||
// Act
|
||||
uint[] retVal1 = UnicodeHelpers.GetDefinedCharacterBitmap();
|
||||
uint[] retVal2 = UnicodeHelpers.GetDefinedCharacterBitmap();
|
||||
|
||||
// Assert
|
||||
Assert.Same(retVal1, retVal2);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(1, "a", (int)'a')] // normal BMP char, end of string
|
||||
[InlineData(2, "ab", (int)'a')] // normal BMP char, not end of string
|
||||
[InlineData(3, "\uDFFF", UnicodeReplacementChar)] // trailing surrogate, end of string
|
||||
[InlineData(4, "\uDFFFx", UnicodeReplacementChar)] // trailing surrogate, not end of string
|
||||
[InlineData(5, "\uD800", UnicodeReplacementChar)] // leading surrogate, end of string
|
||||
[InlineData(6, "\uD800x", UnicodeReplacementChar)] // leading surrogate, not end of string, followed by non-surrogate
|
||||
[InlineData(7, "\uD800\uD800", UnicodeReplacementChar)] // leading surrogate, not end of string, followed by leading surrogate
|
||||
[InlineData(8, "\uD800\uDFFF", 0x103FF)] // leading surrogate, not end of string, followed by trailing surrogate
|
||||
public void GetScalarValueFromUtf16(int unused, string input, int expectedResult)
|
||||
{
|
||||
// The 'unused' parameter exists because the xunit runner can't distinguish
|
||||
// the individual malformed data test cases from each other without this
|
||||
// additional identifier.
|
||||
|
||||
fixed (char* pInput = input)
|
||||
{
|
||||
Assert.Equal(expectedResult, UnicodeHelpers.GetScalarValueFromUtf16(pInput, endOfString: (input.Length == 1)));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetUtf8RepresentationForScalarValue()
|
||||
{
|
||||
for (int i = 0; i <= 0x10FFFF; i++)
|
||||
{
|
||||
if (i <= 0xFFFF && Char.IsSurrogate((char)i))
|
||||
{
|
||||
continue; // no surrogates
|
||||
}
|
||||
|
||||
// Arrange
|
||||
byte[] expectedUtf8Bytes = _utf8EncodingThrowOnInvalidBytes.GetBytes(Char.ConvertFromUtf32(i));
|
||||
|
||||
// Act
|
||||
List<byte> actualUtf8Bytes = new List<byte>(4);
|
||||
uint asUtf8 = (uint)UnicodeHelpers.GetUtf8RepresentationForScalarValue((uint)i);
|
||||
do
|
||||
{
|
||||
actualUtf8Bytes.Add((byte)asUtf8);
|
||||
} while ((asUtf8 >>= 8) != 0);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expectedUtf8Bytes, actualUtf8Bytes);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsCharacterDefined()
|
||||
{
|
||||
// Arrange
|
||||
bool[] definedChars = ReadListOfDefinedCharacters();
|
||||
List<string> errors = new List<string>();
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
bool expected = definedChars[i];
|
||||
bool actual = UnicodeHelpers.IsCharacterDefined((char)i);
|
||||
if (expected != actual)
|
||||
{
|
||||
string message = String.Format(CultureInfo.InvariantCulture, "Character U+{0:X4}: expected = {1}, actual = {2}", i, expected, actual);
|
||||
errors.Add(message);
|
||||
}
|
||||
}
|
||||
|
||||
if (errors.Count > 0)
|
||||
{
|
||||
Assert.True(false, String.Join(Environment.NewLine, errors));
|
||||
}
|
||||
}
|
||||
|
||||
private static bool[] ReadListOfDefinedCharacters()
|
||||
{
|
||||
HashSet<string> allowedCategories = new HashSet<string>();
|
||||
|
||||
// Letters
|
||||
allowedCategories.Add("Lu");
|
||||
allowedCategories.Add("Ll");
|
||||
allowedCategories.Add("Lt");
|
||||
allowedCategories.Add("Lm");
|
||||
allowedCategories.Add("Lo");
|
||||
|
||||
// Marks
|
||||
allowedCategories.Add("Mn");
|
||||
allowedCategories.Add("Mc");
|
||||
allowedCategories.Add("Me");
|
||||
|
||||
// Numbers
|
||||
allowedCategories.Add("Nd");
|
||||
allowedCategories.Add("Nl");
|
||||
allowedCategories.Add("No");
|
||||
|
||||
// Punctuation
|
||||
allowedCategories.Add("Pc");
|
||||
allowedCategories.Add("Pd");
|
||||
allowedCategories.Add("Ps");
|
||||
allowedCategories.Add("Pe");
|
||||
allowedCategories.Add("Pi");
|
||||
allowedCategories.Add("Pf");
|
||||
allowedCategories.Add("Po");
|
||||
|
||||
// Symbols
|
||||
allowedCategories.Add("Sm");
|
||||
allowedCategories.Add("Sc");
|
||||
allowedCategories.Add("Sk");
|
||||
allowedCategories.Add("So");
|
||||
|
||||
// Separators
|
||||
// With the exception of U+0020 SPACE, these aren't allowed
|
||||
|
||||
// Other
|
||||
// We only allow one category of 'other' characters
|
||||
allowedCategories.Add("Cf");
|
||||
|
||||
HashSet<string> seenCategories = new HashSet<string>();
|
||||
|
||||
bool[] retVal = new bool[0x10000];
|
||||
string[] allLines = new StreamReader(typeof(UnicodeHelpersTests).GetTypeInfo().Assembly.GetManifestResourceStream("../../unicode/UnicodeData.txt")).ReadAllLines();
|
||||
|
||||
foreach (string line in allLines)
|
||||
{
|
||||
string[] splitLine = line.Split(';');
|
||||
uint codePoint = UInt32.Parse(splitLine[0], NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
|
||||
if (codePoint >= retVal.Length)
|
||||
{
|
||||
continue; // don't care about supplementary chars
|
||||
}
|
||||
|
||||
if (codePoint == (uint)' ')
|
||||
{
|
||||
retVal[codePoint] = true; // we allow U+0020 SPACE as our only valid Zs (whitespace) char
|
||||
}
|
||||
else
|
||||
{
|
||||
string category = splitLine[2];
|
||||
if (allowedCategories.Contains(category))
|
||||
{
|
||||
retVal[codePoint] = true; // chars in this category are allowable
|
||||
seenCategories.Add(category);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, we need to make sure we've seen every category which contains
|
||||
// allowed characters. This provides extra defense against having a typo
|
||||
// in the list of categories.
|
||||
Assert.Equal(allowedCategories.OrderBy(c => c), seenCategories.OrderBy(c => c));
|
||||
|
||||
return retVal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,302 @@
|
|||
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using Xunit;
|
||||
|
||||
namespace Microsoft.Framework.WebEncoders
|
||||
{
|
||||
public class UrlEncoderTests
|
||||
{
|
||||
private static UTF8Encoding _utf8EncodingThrowOnInvalidBytes = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithCodePointFilter()
|
||||
{
|
||||
// Arrange
|
||||
var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
||||
UrlEncoder encoder = new UrlEncoder(filter);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.UrlEncode("a"));
|
||||
Assert.Equal("b", encoder.UrlEncode("b"));
|
||||
Assert.Equal("%63", encoder.UrlEncode("c"));
|
||||
Assert.Equal("d", encoder.UrlEncode("d"));
|
||||
Assert.Equal("%00", encoder.UrlEncode("\0")); // we still always encode control chars
|
||||
Assert.Equal("%26", encoder.UrlEncode("&")); // we still always encode HTML-special chars
|
||||
Assert.Equal("%EF%BF%BF", encoder.UrlEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithUnicodeBlocks()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("%61", encoder.UrlEncode("a"));
|
||||
Assert.Equal("\u00E9", encoder.UrlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("\u2601", encoder.UrlEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Ctor_WithNoParameters_DefaultsToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("a", encoder.UrlEncode("a"));
|
||||
Assert.Equal("%C3%A9", encoder.UrlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
||||
Assert.Equal("%E2%98%81", encoder.UrlEncode("\u2601" /* CLOUD */));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_EquivalentToBasicLatin()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder controlEncoder = new UrlEncoder(UnicodeBlocks.BasicLatin);
|
||||
UrlEncoder testEncoder = UrlEncoder.Default;
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= Char.MaxValue; i++)
|
||||
{
|
||||
if (!IsSurrogateCodePoint(i))
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
Assert.Equal(controlEncoder.UrlEncode(input), testEncoder.UrlEncode(input));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Default_ReturnsSingletonInstance()
|
||||
{
|
||||
// Act
|
||||
UrlEncoder encoder1 = UrlEncoder.Default;
|
||||
UrlEncoder encoder2 = UrlEncoder.Default;
|
||||
|
||||
// Assert
|
||||
Assert.Same(encoder1, encoder2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_AllRangesAllowed_StillEncodesForbiddenChars()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All);
|
||||
|
||||
// Act & assert - BMP chars
|
||||
for (int i = 0; i <= 0xFFFF; i++)
|
||||
{
|
||||
string input = new String((char)i, 1);
|
||||
string expected;
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
expected = "%EF%BF%BD"; // unpaired surrogate -> Unicode replacement char
|
||||
}
|
||||
else
|
||||
{
|
||||
bool mustEncode = true;
|
||||
|
||||
// RFC 3987, Sec. 2.2 gives the list of allowed chars
|
||||
// (We allow 'ipchar' except for "'", "&", "+", "%", and "="
|
||||
if (('a' <= i && i <= 'z') || ('A' <= i && i <= 'Z') || ('0' <= i && i <= '9'))
|
||||
{
|
||||
mustEncode = false; // ALPHA / DIGIT
|
||||
}
|
||||
else if ((0x00A0 <= i && i <= 0xD7FF) | (0xF900 <= i && i <= 0xFDCF) | (0xFDF0 <= i && i <= 0xFFEF))
|
||||
{
|
||||
mustEncode = !UnicodeHelpers.IsCharacterDefined((char)i); // 'ucschar'
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (i)
|
||||
{
|
||||
// iunreserved
|
||||
case '-':
|
||||
case '.':
|
||||
case '_':
|
||||
case '~':
|
||||
|
||||
// ipchar
|
||||
case ':':
|
||||
case '@':
|
||||
|
||||
// sub-delims
|
||||
case '!':
|
||||
case '$':
|
||||
case '(':
|
||||
case ')':
|
||||
case '*':
|
||||
case ',':
|
||||
case ';':
|
||||
mustEncode = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (mustEncode)
|
||||
{
|
||||
expected = GetKnownGoodPercentEncodedValue(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
expected = input; // no encoding
|
||||
}
|
||||
}
|
||||
|
||||
string retVal = encoder.UrlEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
// Act & assert - astral chars
|
||||
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
||||
{
|
||||
string input = Char.ConvertFromUtf32(i);
|
||||
string expected = GetKnownGoodPercentEncodedValue(i);
|
||||
string retVal = encoder.UrlEncode(input);
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All); // allow all codepoints
|
||||
|
||||
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
||||
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
||||
const string expected = "a%EF%BF%BDb%EF%BF%BDc%EF%BF%BD%EF%BF%BDd%EF%BF%BD%F0%90%8F%BFe%EF%BF%BD"; // 'D800' 'DFFF' was preserved since it's valid
|
||||
|
||||
// Act
|
||||
string retVal = encoder.UrlEncode(input);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(expected, retVal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_EmptyStringInput_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Equal("", encoder.UrlEncode(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
string input = "Hello,there!";
|
||||
|
||||
// Act & assert
|
||||
Assert.Same(input, encoder.UrlEncode(input));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_NullInput_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
|
||||
// Act & assert
|
||||
Assert.Null(encoder.UrlEncode(null));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_WithCharsRequiringEncodingAtBeginning()
|
||||
{
|
||||
Assert.Equal(@"%26Hello,there!", new UrlEncoder().UrlEncode("&Hello,there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_WithCharsRequiringEncodingAtEnd()
|
||||
{
|
||||
Assert.Equal(@"Hello,there!%26", new UrlEncoder().UrlEncode("Hello,there!&"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_WithCharsRequiringEncodingInMiddle()
|
||||
{
|
||||
Assert.Equal(@"Hello,%20%26there!", new UrlEncoder().UrlEncode("Hello, &there!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_WithCharsRequiringEncodingInterspersed()
|
||||
{
|
||||
Assert.Equal(@"Hello,%20%3Cthere%3E!", new UrlEncoder().UrlEncode("Hello, <there>!"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_CharArray()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.UrlEncode("Hello+world!".ToCharArray(), 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("lo%2Bwo", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_StringSubstring()
|
||||
{
|
||||
// Arrange
|
||||
UrlEncoder encoder = new UrlEncoder();
|
||||
var output = new StringWriter();
|
||||
|
||||
// Act
|
||||
encoder.UrlEncode("Hello+world!", 3, 5, output);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("lo%2Bwo", output.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UrlEncode_DoesNotOutputHtmlSensitiveCharacters()
|
||||
{
|
||||
// Per the design document, we provide additional defense-in-depth
|
||||
// by never emitting HTML-sensitive characters unescaped.
|
||||
|
||||
// Arrange
|
||||
UrlEncoder urlEncoder = new UrlEncoder(UnicodeBlocks.All);
|
||||
HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeBlocks.All);
|
||||
|
||||
// Act & assert
|
||||
for (int i = 0; i <= 0x10FFFF; i++)
|
||||
{
|
||||
if (IsSurrogateCodePoint(i))
|
||||
{
|
||||
continue; // surrogates don't matter here
|
||||
}
|
||||
|
||||
string urlEncoded = urlEncoder.UrlEncode(Char.ConvertFromUtf32(i));
|
||||
string thenHtmlEncoded = htmlEncoder.HtmlEncode(urlEncoded);
|
||||
Assert.Equal(urlEncoded, thenHtmlEncoded); // should have contained no HTML-sensitive characters
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetKnownGoodPercentEncodedValue(int codePoint)
|
||||
{
|
||||
// Convert the code point to UTF16, then call Encoding.UTF8.GetBytes, then hex-encode everything
|
||||
return String.Concat(_utf8EncodingThrowOnInvalidBytes.GetBytes(Char.ConvertFromUtf32(codePoint)).Select(b => String.Format(CultureInfo.InvariantCulture, "%{0:X2}", b)));
|
||||
}
|
||||
|
||||
private static bool IsSurrogateCodePoint(int codePoint)
|
||||
{
|
||||
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"dependencies": {
|
||||
"Microsoft.Framework.WebEncoders": "1.0.0-*",
|
||||
"Moq": "4.2.1312.1622",
|
||||
"Newtonsoft.Json": "6.0.6",
|
||||
"xunit.runner.kre": "1.0.0-*"
|
||||
},
|
||||
"commands": {
|
||||
"test": "xunit.runner.kre"
|
||||
},
|
||||
"compilationOptions": {
|
||||
"allowUnsafe": true
|
||||
},
|
||||
"frameworks": {
|
||||
"aspnet50": { }
|
||||
},
|
||||
"resources": "..\\..\\unicode\\UnicodeData.txt"
|
||||
}
|
||||
|
|
@ -0,0 +1,283 @@
|
|||
# Blocks-7.0.0.txt
|
||||
# Date: 2014-04-03, 23:23:00 GMT [RP, KW]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2014 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Note: The casing of block names is not normative.
|
||||
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
|
||||
#
|
||||
# Format:
|
||||
# Start Code..End Code; Block Name
|
||||
|
||||
# ================================================
|
||||
|
||||
# Note: When comparing block names, casing, whitespace, hyphens,
|
||||
# and underbars are ignored.
|
||||
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
|
||||
# For more information on the comparison of property values,
|
||||
# see UAX #44: http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# All code points not explicitly listed for Block
|
||||
# have the value No_Block.
|
||||
|
||||
# Property: Block
|
||||
#
|
||||
# @missing: 0000..10FFFF; No_Block
|
||||
|
||||
0000..007F; Basic Latin
|
||||
0080..00FF; Latin-1 Supplement
|
||||
0100..017F; Latin Extended-A
|
||||
0180..024F; Latin Extended-B
|
||||
0250..02AF; IPA Extensions
|
||||
02B0..02FF; Spacing Modifier Letters
|
||||
0300..036F; Combining Diacritical Marks
|
||||
0370..03FF; Greek and Coptic
|
||||
0400..04FF; Cyrillic
|
||||
0500..052F; Cyrillic Supplement
|
||||
0530..058F; Armenian
|
||||
0590..05FF; Hebrew
|
||||
0600..06FF; Arabic
|
||||
0700..074F; Syriac
|
||||
0750..077F; Arabic Supplement
|
||||
0780..07BF; Thaana
|
||||
07C0..07FF; NKo
|
||||
0800..083F; Samaritan
|
||||
0840..085F; Mandaic
|
||||
08A0..08FF; Arabic Extended-A
|
||||
0900..097F; Devanagari
|
||||
0980..09FF; Bengali
|
||||
0A00..0A7F; Gurmukhi
|
||||
0A80..0AFF; Gujarati
|
||||
0B00..0B7F; Oriya
|
||||
0B80..0BFF; Tamil
|
||||
0C00..0C7F; Telugu
|
||||
0C80..0CFF; Kannada
|
||||
0D00..0D7F; Malayalam
|
||||
0D80..0DFF; Sinhala
|
||||
0E00..0E7F; Thai
|
||||
0E80..0EFF; Lao
|
||||
0F00..0FFF; Tibetan
|
||||
1000..109F; Myanmar
|
||||
10A0..10FF; Georgian
|
||||
1100..11FF; Hangul Jamo
|
||||
1200..137F; Ethiopic
|
||||
1380..139F; Ethiopic Supplement
|
||||
13A0..13FF; Cherokee
|
||||
1400..167F; Unified Canadian Aboriginal Syllabics
|
||||
1680..169F; Ogham
|
||||
16A0..16FF; Runic
|
||||
1700..171F; Tagalog
|
||||
1720..173F; Hanunoo
|
||||
1740..175F; Buhid
|
||||
1760..177F; Tagbanwa
|
||||
1780..17FF; Khmer
|
||||
1800..18AF; Mongolian
|
||||
18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
|
||||
1900..194F; Limbu
|
||||
1950..197F; Tai Le
|
||||
1980..19DF; New Tai Lue
|
||||
19E0..19FF; Khmer Symbols
|
||||
1A00..1A1F; Buginese
|
||||
1A20..1AAF; Tai Tham
|
||||
1AB0..1AFF; Combining Diacritical Marks Extended
|
||||
1B00..1B7F; Balinese
|
||||
1B80..1BBF; Sundanese
|
||||
1BC0..1BFF; Batak
|
||||
1C00..1C4F; Lepcha
|
||||
1C50..1C7F; Ol Chiki
|
||||
1CC0..1CCF; Sundanese Supplement
|
||||
1CD0..1CFF; Vedic Extensions
|
||||
1D00..1D7F; Phonetic Extensions
|
||||
1D80..1DBF; Phonetic Extensions Supplement
|
||||
1DC0..1DFF; Combining Diacritical Marks Supplement
|
||||
1E00..1EFF; Latin Extended Additional
|
||||
1F00..1FFF; Greek Extended
|
||||
2000..206F; General Punctuation
|
||||
2070..209F; Superscripts and Subscripts
|
||||
20A0..20CF; Currency Symbols
|
||||
20D0..20FF; Combining Diacritical Marks for Symbols
|
||||
2100..214F; Letterlike Symbols
|
||||
2150..218F; Number Forms
|
||||
2190..21FF; Arrows
|
||||
2200..22FF; Mathematical Operators
|
||||
2300..23FF; Miscellaneous Technical
|
||||
2400..243F; Control Pictures
|
||||
2440..245F; Optical Character Recognition
|
||||
2460..24FF; Enclosed Alphanumerics
|
||||
2500..257F; Box Drawing
|
||||
2580..259F; Block Elements
|
||||
25A0..25FF; Geometric Shapes
|
||||
2600..26FF; Miscellaneous Symbols
|
||||
2700..27BF; Dingbats
|
||||
27C0..27EF; Miscellaneous Mathematical Symbols-A
|
||||
27F0..27FF; Supplemental Arrows-A
|
||||
2800..28FF; Braille Patterns
|
||||
2900..297F; Supplemental Arrows-B
|
||||
2980..29FF; Miscellaneous Mathematical Symbols-B
|
||||
2A00..2AFF; Supplemental Mathematical Operators
|
||||
2B00..2BFF; Miscellaneous Symbols and Arrows
|
||||
2C00..2C5F; Glagolitic
|
||||
2C60..2C7F; Latin Extended-C
|
||||
2C80..2CFF; Coptic
|
||||
2D00..2D2F; Georgian Supplement
|
||||
2D30..2D7F; Tifinagh
|
||||
2D80..2DDF; Ethiopic Extended
|
||||
2DE0..2DFF; Cyrillic Extended-A
|
||||
2E00..2E7F; Supplemental Punctuation
|
||||
2E80..2EFF; CJK Radicals Supplement
|
||||
2F00..2FDF; Kangxi Radicals
|
||||
2FF0..2FFF; Ideographic Description Characters
|
||||
3000..303F; CJK Symbols and Punctuation
|
||||
3040..309F; Hiragana
|
||||
30A0..30FF; Katakana
|
||||
3100..312F; Bopomofo
|
||||
3130..318F; Hangul Compatibility Jamo
|
||||
3190..319F; Kanbun
|
||||
31A0..31BF; Bopomofo Extended
|
||||
31C0..31EF; CJK Strokes
|
||||
31F0..31FF; Katakana Phonetic Extensions
|
||||
3200..32FF; Enclosed CJK Letters and Months
|
||||
3300..33FF; CJK Compatibility
|
||||
3400..4DBF; CJK Unified Ideographs Extension A
|
||||
4DC0..4DFF; Yijing Hexagram Symbols
|
||||
4E00..9FFF; CJK Unified Ideographs
|
||||
A000..A48F; Yi Syllables
|
||||
A490..A4CF; Yi Radicals
|
||||
A4D0..A4FF; Lisu
|
||||
A500..A63F; Vai
|
||||
A640..A69F; Cyrillic Extended-B
|
||||
A6A0..A6FF; Bamum
|
||||
A700..A71F; Modifier Tone Letters
|
||||
A720..A7FF; Latin Extended-D
|
||||
A800..A82F; Syloti Nagri
|
||||
A830..A83F; Common Indic Number Forms
|
||||
A840..A87F; Phags-pa
|
||||
A880..A8DF; Saurashtra
|
||||
A8E0..A8FF; Devanagari Extended
|
||||
A900..A92F; Kayah Li
|
||||
A930..A95F; Rejang
|
||||
A960..A97F; Hangul Jamo Extended-A
|
||||
A980..A9DF; Javanese
|
||||
A9E0..A9FF; Myanmar Extended-B
|
||||
AA00..AA5F; Cham
|
||||
AA60..AA7F; Myanmar Extended-A
|
||||
AA80..AADF; Tai Viet
|
||||
AAE0..AAFF; Meetei Mayek Extensions
|
||||
AB00..AB2F; Ethiopic Extended-A
|
||||
AB30..AB6F; Latin Extended-E
|
||||
ABC0..ABFF; Meetei Mayek
|
||||
AC00..D7AF; Hangul Syllables
|
||||
D7B0..D7FF; Hangul Jamo Extended-B
|
||||
D800..DB7F; High Surrogates
|
||||
DB80..DBFF; High Private Use Surrogates
|
||||
DC00..DFFF; Low Surrogates
|
||||
E000..F8FF; Private Use Area
|
||||
F900..FAFF; CJK Compatibility Ideographs
|
||||
FB00..FB4F; Alphabetic Presentation Forms
|
||||
FB50..FDFF; Arabic Presentation Forms-A
|
||||
FE00..FE0F; Variation Selectors
|
||||
FE10..FE1F; Vertical Forms
|
||||
FE20..FE2F; Combining Half Marks
|
||||
FE30..FE4F; CJK Compatibility Forms
|
||||
FE50..FE6F; Small Form Variants
|
||||
FE70..FEFF; Arabic Presentation Forms-B
|
||||
FF00..FFEF; Halfwidth and Fullwidth Forms
|
||||
FFF0..FFFF; Specials
|
||||
10000..1007F; Linear B Syllabary
|
||||
10080..100FF; Linear B Ideograms
|
||||
10100..1013F; Aegean Numbers
|
||||
10140..1018F; Ancient Greek Numbers
|
||||
10190..101CF; Ancient Symbols
|
||||
101D0..101FF; Phaistos Disc
|
||||
10280..1029F; Lycian
|
||||
102A0..102DF; Carian
|
||||
102E0..102FF; Coptic Epact Numbers
|
||||
10300..1032F; Old Italic
|
||||
10330..1034F; Gothic
|
||||
10350..1037F; Old Permic
|
||||
10380..1039F; Ugaritic
|
||||
103A0..103DF; Old Persian
|
||||
10400..1044F; Deseret
|
||||
10450..1047F; Shavian
|
||||
10480..104AF; Osmanya
|
||||
10500..1052F; Elbasan
|
||||
10530..1056F; Caucasian Albanian
|
||||
10600..1077F; Linear A
|
||||
10800..1083F; Cypriot Syllabary
|
||||
10840..1085F; Imperial Aramaic
|
||||
10860..1087F; Palmyrene
|
||||
10880..108AF; Nabataean
|
||||
10900..1091F; Phoenician
|
||||
10920..1093F; Lydian
|
||||
10980..1099F; Meroitic Hieroglyphs
|
||||
109A0..109FF; Meroitic Cursive
|
||||
10A00..10A5F; Kharoshthi
|
||||
10A60..10A7F; Old South Arabian
|
||||
10A80..10A9F; Old North Arabian
|
||||
10AC0..10AFF; Manichaean
|
||||
10B00..10B3F; Avestan
|
||||
10B40..10B5F; Inscriptional Parthian
|
||||
10B60..10B7F; Inscriptional Pahlavi
|
||||
10B80..10BAF; Psalter Pahlavi
|
||||
10C00..10C4F; Old Turkic
|
||||
10E60..10E7F; Rumi Numeral Symbols
|
||||
11000..1107F; Brahmi
|
||||
11080..110CF; Kaithi
|
||||
110D0..110FF; Sora Sompeng
|
||||
11100..1114F; Chakma
|
||||
11150..1117F; Mahajani
|
||||
11180..111DF; Sharada
|
||||
111E0..111FF; Sinhala Archaic Numbers
|
||||
11200..1124F; Khojki
|
||||
112B0..112FF; Khudawadi
|
||||
11300..1137F; Grantha
|
||||
11480..114DF; Tirhuta
|
||||
11580..115FF; Siddham
|
||||
11600..1165F; Modi
|
||||
11680..116CF; Takri
|
||||
118A0..118FF; Warang Citi
|
||||
11AC0..11AFF; Pau Cin Hau
|
||||
12000..123FF; Cuneiform
|
||||
12400..1247F; Cuneiform Numbers and Punctuation
|
||||
13000..1342F; Egyptian Hieroglyphs
|
||||
16800..16A3F; Bamum Supplement
|
||||
16A40..16A6F; Mro
|
||||
16AD0..16AFF; Bassa Vah
|
||||
16B00..16B8F; Pahawh Hmong
|
||||
16F00..16F9F; Miao
|
||||
1B000..1B0FF; Kana Supplement
|
||||
1BC00..1BC9F; Duployan
|
||||
1BCA0..1BCAF; Shorthand Format Controls
|
||||
1D000..1D0FF; Byzantine Musical Symbols
|
||||
1D100..1D1FF; Musical Symbols
|
||||
1D200..1D24F; Ancient Greek Musical Notation
|
||||
1D300..1D35F; Tai Xuan Jing Symbols
|
||||
1D360..1D37F; Counting Rod Numerals
|
||||
1D400..1D7FF; Mathematical Alphanumeric Symbols
|
||||
1E800..1E8DF; Mende Kikakui
|
||||
1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
|
||||
1F000..1F02F; Mahjong Tiles
|
||||
1F030..1F09F; Domino Tiles
|
||||
1F0A0..1F0FF; Playing Cards
|
||||
1F100..1F1FF; Enclosed Alphanumeric Supplement
|
||||
1F200..1F2FF; Enclosed Ideographic Supplement
|
||||
1F300..1F5FF; Miscellaneous Symbols and Pictographs
|
||||
1F600..1F64F; Emoticons
|
||||
1F650..1F67F; Ornamental Dingbats
|
||||
1F680..1F6FF; Transport and Map Symbols
|
||||
1F700..1F77F; Alchemical Symbols
|
||||
1F780..1F7FF; Geometric Shapes Extended
|
||||
1F800..1F8FF; Supplemental Arrows-C
|
||||
20000..2A6DF; CJK Unified Ideographs Extension B
|
||||
2A700..2B73F; CJK Unified Ideographs Extension C
|
||||
2B740..2B81F; CJK Unified Ideographs Extension D
|
||||
2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
||||
E0000..E007F; Tags
|
||||
E0100..E01EF; Variation Selectors Supplement
|
||||
F0000..FFFFF; Supplementary Private Use Area-A
|
||||
100000..10FFFF; Supplementary Private Use Area-B
|
||||
|
||||
# EOF
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.1" />
|
||||
</startup>
|
||||
</configuration>
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>DefinedCharListGenerator</RootNamespace>
|
||||
<AssemblyName>DefinedCharListGenerator</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="..\..\UnicodeData.txt">
|
||||
<Link>UnicodeData.txt</Link>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
|
||||
namespace DefinedCharListGenerator
|
||||
{
|
||||
/// <summary>
|
||||
/// This program outputs the 'unicode-defined-chars.bin' bitmap file.
|
||||
/// </summary>
|
||||
class Program
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
// The input file should be UnicodeData.txt from the UCD corresponding to the
|
||||
// version of the Unicode spec we're consuming.
|
||||
// More info: http://www.unicode.org/reports/tr44/tr44-14.html#UCD_Files
|
||||
// Latest UnicodeData.txt: http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
|
||||
const uint MAX_UNICODE_CHAR = 0x10FFFF; // Unicode range is U+0000 .. U+10FFFF
|
||||
bool[] definedChars = new bool[MAX_UNICODE_CHAR + 1];
|
||||
|
||||
// Read all defined characters from the input file.
|
||||
string[] allLines = File.ReadAllLines("UnicodeData.txt");
|
||||
|
||||
// Each line is a semicolon-delimited list of information:
|
||||
// <value>;<name>;<category>;...
|
||||
foreach (string line in allLines)
|
||||
{
|
||||
string[] splitLine = line.Split(new char[] { ';' }, 4);
|
||||
|
||||
// We only allow certain categories of code points.
|
||||
// Zs (space separators) aren't included, but we allow U+0020 SPACE as a special case
|
||||
uint codepoint = uint.Parse(splitLine[0], NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
|
||||
string category = splitLine[2];
|
||||
if (!(codepoint == (uint)' ' || IsAllowedUnicodeCategory(category)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Debug.Assert(codepoint <= MAX_UNICODE_CHAR);
|
||||
definedChars[codepoint] = true;
|
||||
}
|
||||
|
||||
// Finally, write the list of defined characters out as a bitmap.
|
||||
// Each consecutive block of 8 chars is written as a single byte.
|
||||
// For instance, the first byte of the output file contains the
|
||||
// bitmap for the following codepoints:
|
||||
// - (bit 7) U+0007 [MSB]
|
||||
// - (bit 6) U+0006
|
||||
// - (bit 5) U+0005
|
||||
// - (bit 4) U+0004
|
||||
// - (bit 3) U+0003
|
||||
// - (bit 2) U+0002
|
||||
// - (bit 1) U+0001
|
||||
// - (bit 0) U+0000 [LSB]
|
||||
// The next byte will contain the bitmap for U+000F to U+0008,
|
||||
// and so on until the last byte, which is U+FFFF to U+FFF8.
|
||||
// The bytes are written out in little-endian order.
|
||||
// We're only concerned about the BMP (U+0000 .. U+FFFF) for now.
|
||||
MemoryStream outBuffer = new MemoryStream();
|
||||
for (int i = 0; i < 0x10000; i += 8)
|
||||
{
|
||||
int thisByte = 0;
|
||||
for (int j = 7; j >= 0; j--)
|
||||
{
|
||||
thisByte <<= 1;
|
||||
if (definedChars[i + j])
|
||||
{
|
||||
thisByte |= 0x1;
|
||||
}
|
||||
}
|
||||
outBuffer.WriteByte((byte)thisByte);
|
||||
}
|
||||
|
||||
File.WriteAllBytes("unicode-defined-chars.bin", outBuffer.ToArray());
|
||||
}
|
||||
|
||||
private static bool IsAllowedUnicodeCategory(string category)
|
||||
{
|
||||
// We only allow certain classes of characters
|
||||
return category == "Lu" /* letters */
|
||||
|| category == "Ll"
|
||||
|| category == "Lt"
|
||||
|| category == "Lm"
|
||||
|| category == "Lo"
|
||||
|| category == "Mn" /* marks */
|
||||
|| category == "Mc"
|
||||
|| category == "Me"
|
||||
|| category == "Nd" /* numbers */
|
||||
|| category == "Nl"
|
||||
|| category == "No"
|
||||
|| category == "Pc" /* punctuation */
|
||||
|| category == "Pd"
|
||||
|| category == "Ps"
|
||||
|| category == "Pe"
|
||||
|| category == "Pi"
|
||||
|| category == "Pf"
|
||||
|| category == "Po"
|
||||
|| category == "Sm" /* symbols */
|
||||
|| category == "Sc"
|
||||
|| category == "Sk"
|
||||
|| category == "So"
|
||||
|| category == "Cf"; /* other */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("DefinedCharListGenerator")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("DefinedCharListGenerator")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2015")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("5089f890-38f7-413c-87b0-d8eb1e238ef5")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2013
|
||||
VisualStudioVersion = 12.0.31101.0
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DefinedCharListGenerator", "DefinedCharListGenerator\DefinedCharListGenerator.csproj", "{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnicodeTablesGenerator", "UnicodeTablesGenerator\UnicodeTablesGenerator.csproj", "{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.1" />
|
||||
</startup>
|
||||
</configuration>
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace UnicodeTablesGenerator
|
||||
{
|
||||
/// <summary>
|
||||
/// This program outputs the 'UnicodeBlocks.generated.txt' and
|
||||
/// 'UnicodeBlocksTests.generated.txt' source files.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The generated files require some hand-tweaking. For instance, you'll need
|
||||
/// to remove surrogates and private use blocks. The files can then be merged
|
||||
/// into the *.generated.cs files as appropriate.
|
||||
/// </remarks>
|
||||
class Program
|
||||
{
|
||||
private const string _codePointFiltersGeneratedFormat = @"
|
||||
/// <summary>
|
||||
/// Represents the '{0}' Unicode block (U+{1}..U+{2}).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// See http://www.unicode.org/charts/PDF/U{1}.pdf for the full set of characters in this block.
|
||||
/// </remarks>
|
||||
public static UnicodeBlock {3}
|
||||
{{
|
||||
get
|
||||
{{
|
||||
return Volatile.Read(ref _{4}) ?? CreateBlock(ref _{4}, first: '\u{1}', last: '\u{2}');
|
||||
}}
|
||||
}}
|
||||
private static UnicodeBlock _{4};
|
||||
";
|
||||
|
||||
private const string _codePointFiltersTestsGeneratedFormat = @"[InlineData('\u{1}', '\u{2}', nameof(UnicodeBlocks.{0}))]";
|
||||
|
||||
private static void Main()
|
||||
{
|
||||
// The input file should be Blocks.txt from the UCD corresponding to the
|
||||
// version of the Unicode spec we're consuming.
|
||||
// More info: http://www.unicode.org/reports/tr44/
|
||||
// Latest Blocks.txt: http://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
|
||||
|
||||
StringBuilder runtimeCodeBuilder = new StringBuilder();
|
||||
StringBuilder testCodeBuilder = new StringBuilder();
|
||||
string[] allLines = File.ReadAllLines("Blocks.txt");
|
||||
|
||||
Regex regex = new Regex(@"^(?<startCode>[0-9A-F]{4})\.\.(?<endCode>[0-9A-F]{4}); (?<blockName>.+)$");
|
||||
|
||||
foreach (var line in allLines)
|
||||
{
|
||||
// We only care about lines of the form "XXXX..XXXX; Block name"
|
||||
var match = regex.Match(line);
|
||||
if (match == null || !match.Success)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
string startCode = match.Groups["startCode"].Value;
|
||||
string endCode = match.Groups["endCode"].Value;
|
||||
string blockName = match.Groups["blockName"].Value;
|
||||
string blockNameAsProperty = RemoveAllNonAlphanumeric(blockName);
|
||||
string blockNameAsField = WithDotNetFieldCasing(blockNameAsProperty);
|
||||
|
||||
runtimeCodeBuilder.AppendFormat(CultureInfo.InvariantCulture, _codePointFiltersGeneratedFormat,
|
||||
blockName, startCode, endCode, blockNameAsProperty, blockNameAsField);
|
||||
|
||||
testCodeBuilder.AppendFormat(CultureInfo.InvariantCulture, _codePointFiltersTestsGeneratedFormat,
|
||||
blockNameAsProperty, startCode, endCode);
|
||||
testCodeBuilder.AppendLine();
|
||||
}
|
||||
|
||||
File.WriteAllText("UnicodeBlocks.generated.txt", runtimeCodeBuilder.ToString());
|
||||
File.WriteAllText("UnicodeBlocksTests.generated.txt", testCodeBuilder.ToString());
|
||||
}
|
||||
|
||||
private static string RemoveAllNonAlphanumeric(string blockName)
|
||||
{
|
||||
// Allow only A-Z 0-9
|
||||
return new String(blockName.ToCharArray().Where(c => ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')).ToArray());
|
||||
}
|
||||
|
||||
private static string WithDotNetFieldCasing(string input)
|
||||
{
|
||||
char[] chars = input.ToCharArray();
|
||||
for (int i = 0; i < chars.Length; i++)
|
||||
{
|
||||
if (Char.IsLower(chars[i]))
|
||||
{
|
||||
if (i > 1)
|
||||
{
|
||||
// restore original casing for the previous char unless the previous
|
||||
// char was at the front of the string
|
||||
chars[i - 1] = input[i - 1];
|
||||
}
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
chars[i] = Char.ToLowerInvariant(chars[i]);
|
||||
}
|
||||
}
|
||||
return new String(chars);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("UnicodeTablesGenerator")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("UnicodeTablesGenerator")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2015")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("c9286457-3d25-4143-9458-028aabedc4f5")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>UnicodeTablesGenerator</RootNamespace>
|
||||
<AssemblyName>UnicodeTablesGenerator</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="..\..\Blocks.txt">
|
||||
<Link>Blocks.txt</Link>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,47 @@
|
|||
The files Blocks.txt and UnicodeData.txt in this directory were
|
||||
retrieved from the following URLs on Saturday, February 7, 2015.
|
||||
|
||||
http://www.unicode.org/Public/7.0.0/ucd/Blocks.txt
|
||||
http://www.unicode.org/Public/7.0.0/ucd/UnicodeData.txt
|
||||
|
||||
The below copyright notice applies to these files.
|
||||
|
||||
========================================================================
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2015 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in
|
||||
http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software,
|
||||
(b) this copyright and permission notice appear in associated
|
||||
documentation, and
|
||||
(c) there is clear notice in each modified Data File or in the Software
|
||||
as well as in the documentation associated with the Data File(s) or
|
||||
Software that the data or software has been modified.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
Loading…
Reference in New Issue