Add unit tests and code generation routines

2015-02-27 11:06:31 -08:00 · 2015-02-27 11:06:31 -08:00 · 0ca24147a0
parent c5dc9abff6
commit 0ca24147a0
32 changed files with 30581 additions and 0 deletions
--- a/HttpAbstractions.sln
+++ b/HttpAbstractions.sln
@ -39,6 +39,8 @@ Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Net.Http.Headers.
 EndProject
 Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Framework.WebEncoders", "src\Microsoft.Framework.WebEncoders\Microsoft.Framework.WebEncoders.kproj", "{DD2CE416-765E-4000-A03E-C2FF165DA1B6}"
 EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Microsoft.Framework.WebEncoders.Tests", "test\Microsoft.Framework.WebEncoders.Tests\Microsoft.Framework.WebEncoders.Tests.kproj", "{7AE2731D-43CD-4CF8-850A-4914DE2CE930}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@ -215,6 +217,18 @@ Global
 		{DD2CE416-765E-4000-A03E-C2FF165DA1B6}.Release|Mixed Platforms.Build.0 = Release|Any CPU
 		{DD2CE416-765E-4000-A03E-C2FF165DA1B6}.Release|x86.ActiveCfg = Release|Any CPU
 		{DD2CE416-765E-4000-A03E-C2FF165DA1B6}.Release|x86.Build.0 = Release|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Debug|x86.Build.0 = Debug|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Any CPU.Build.0 = Release|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|x86.ActiveCfg = Release|Any CPU
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@ -236,5 +250,6 @@ Global
 		{60AA2FDB-8121-4826-8D00-9A143FEFAF66} = {A5A15F1C-885A-452A-A731-B0173DDBD913}
 		{E6BB7AD1-BD10-4A23-B780-F4A86ADF00D1} = {F31FF137-390C-49BF-A3BD-7C6ED3597C21}
 		{DD2CE416-765E-4000-A03E-C2FF165DA1B6} = {A5A15F1C-885A-452A-A731-B0173DDBD913}
+		{7AE2731D-43CD-4CF8-850A-4914DE2CE930} = {F31FF137-390C-49BF-A3BD-7C6ED3597C21}
 	EndGlobalSection
 EndGlobal
--- a/src/Microsoft.Framework.WebEncoders/AllowedCharsBitmap.cs
+++ b/src/Microsoft.Framework.WebEncoders/AllowedCharsBitmap.cs
@ -25,6 +25,13 @@ namespace Microsoft.Framework.WebEncoders
            _allowedCharsBitmap[index] |= 0x1U << offset;
        }

+        // Marks all characters as forbidden (must be returned encoded)
+        public void Clear()
+        {
+            Array.Clear(_allowedCharsBitmap, 0, _allowedCharsBitmap.Length);
+        }
+
+        // Creates a deep copy of this bitmap
        public AllowedCharsBitmap Clone()
        {
            AllowedCharsBitmap retVal;
--- a/src/Microsoft.Framework.WebEncoders/CodePointFilter.cs
+++ b/src/Microsoft.Framework.WebEncoders/CodePointFilter.cs
@ -151,6 +151,18 @@ namespace Microsoft.Framework.WebEncoders
            return this;
        }

+        /// <summary>
+        /// Disallows all characters through the filter.
+        /// </summary>
+        /// <returns>
+        /// The 'this' instance.
+        /// </returns>
+        public CodePointFilter Clear()
+        {
+            _allowedCharsBitmap.Clear();
+            return this;
+        }
+
        /// <summary>
        /// Disallows all characters in the specified Unicode character block through the filter.
        /// </summary>
--- a/src/Microsoft.Framework.WebEncoders/Properties/AssemblyInfo.cs
+++ b/src/Microsoft.Framework.WebEncoders/Properties/AssemblyInfo.cs
@ -0,0 +1,7 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+[assembly: InternalsVisibleTo("Microsoft.Framework.WebEncoders.Tests")]
--- a/test/Microsoft.Framework.WebEncoders.Tests/AllowedCharsBitmapTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/AllowedCharsBitmapTests.cs
@ -0,0 +1,125 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class AllowedCharsBitmapTests
+    {
+        [Fact]
+        public void Ctor_EmptyByDefault()
+        {
+            // Act
+            var bitmap = new AllowedCharsBitmap();
+
+            // Assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                Assert.False(bitmap.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void Allow_Forbid_ZigZag()
+        {
+            // Arrange
+            var bitmap = new AllowedCharsBitmap();
+
+            // Act
+            // The only chars which are allowed are those whose code points are multiples of 3 or 7
+            // who aren't also multiples of 5. Exception: multiples of 35 are allowed.
+            for (int i = 0; i <= Char.MaxValue; i += 3)
+            {
+                bitmap.AllowCharacter((char)i);
+            }
+            for (int i = 0; i <= Char.MaxValue; i += 5)
+            {
+                bitmap.ForbidCharacter((char)i);
+            }
+            for (int i = 0; i <= Char.MaxValue; i += 7)
+            {
+                bitmap.AllowCharacter((char)i);
+            }
+
+            // Assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                bool isAllowed = false;
+                if (i % 3 == 0) { isAllowed = true; }
+                if (i % 5 == 0) { isAllowed = false; }
+                if (i % 7 == 0) { isAllowed = true; }
+                Assert.Equal(isAllowed, bitmap.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void Clear_ForbidsEverything()
+        {
+            // Arrange
+            var bitmap = new AllowedCharsBitmap();
+            for (int i = 1; i <= Char.MaxValue; i++)
+            {
+                bitmap.AllowCharacter((char)i);
+            }
+
+            // Act
+            bitmap.Clear();
+
+            // Assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                Assert.False(bitmap.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void Clone_MakesDeepCopy()
+        {
+            // Arrange
+            var originalBitmap = new AllowedCharsBitmap();
+            originalBitmap.AllowCharacter('x');
+
+            // Act
+            var clonedBitmap = originalBitmap.Clone();
+            clonedBitmap.AllowCharacter('y');
+
+            // Assert
+            Assert.True(originalBitmap.IsCharacterAllowed('x'));
+            Assert.False(originalBitmap.IsCharacterAllowed('y'));
+            Assert.True(clonedBitmap.IsCharacterAllowed('x'));
+            Assert.True(clonedBitmap.IsCharacterAllowed('y'));
+        }
+
+        [Fact]
+        public void ForbidUndefinedCharacters_RemovesUndefinedChars()
+        {
+            // Arrange
+            // We only allow odd-numbered characters in this test so that
+            // we can validate that we properly merged the two bitmaps together
+            // rather than simply overwriting the target.
+            var bitmap = new AllowedCharsBitmap();
+            for (int i = 1; i <= Char.MaxValue; i += 2)
+            {
+                bitmap.AllowCharacter((char)i);
+            }
+
+            // Act
+            bitmap.ForbidUndefinedCharacters();
+
+            // Assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                if (i % 2 == 0)
+                {
+                    Assert.False(bitmap.IsCharacterAllowed((char)i)); // these chars were never allowed in the original description
+                }
+                else
+                {
+                    Assert.Equal(UnicodeHelpers.IsCharacterDefined((char)i), bitmap.IsCharacterAllowed((char)i));
+                }
+            }
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/CodePointFilterTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/CodePointFilterTests.cs
@ -0,0 +1,369 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class CodePointFilterTests
+    {
+        [Fact]
+        public void Ctor_Parameterless_DefaultsToBasicLatin()
+        {
+            // Act
+            var filter = new CodePointFilter();
+
+            // Assert
+            for (int i = 0; i <= 0x007F; i++)
+            {
+                Assert.True(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0080; i <= Char.MaxValue; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void Ctor_OtherCodePointFilterAsInterface()
+        {
+            // Arrange
+            var originalFilter = new OddCodePointFilter();
+
+            // Act
+            var newFilter = new CodePointFilter(originalFilter);
+
+            // Assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                Assert.Equal((i % 2) == 1, newFilter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void Ctor_OtherCodePointFilterAsConcreteType_Clones()
+        {
+            // Arrange
+            var originalFilter = new CodePointFilter(UnicodeBlocks.None).AllowChar('x');
+
+            // Act
+            var newFilter = new CodePointFilter(originalFilter).AllowChar('y');
+
+            // Assert
+            Assert.True(originalFilter.IsCharacterAllowed('x'));
+            Assert.False(originalFilter.IsCharacterAllowed('y'));
+            Assert.True(newFilter.IsCharacterAllowed('x'));
+            Assert.True(newFilter.IsCharacterAllowed('y'));
+        }
+
+        [Fact]
+        public void Ctor_UnicodeBlocks()
+        {
+            // Act
+            var filter = new CodePointFilter(UnicodeBlocks.LatinExtendedA, UnicodeBlocks.LatinExtendedC);
+
+            // Assert
+            for (int i = 0; i < 0x0100; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0100; i <= 0x017F; i++)
+            {
+                Assert.True(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0180; i < 0x2C60; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x2C60; i <= 0x2C7F; i++)
+            {
+                Assert.True(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x2C80; i <= Char.MaxValue; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void AllowBlock()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.None);
+
+            // Act
+            var retVal = filter.AllowBlock(UnicodeBlocks.LatinExtendedA);
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            for (int i = 0; i < 0x0100; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0100; i <= 0x017F; i++)
+            {
+                Assert.True(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0180; i <= Char.MaxValue; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void AllowBlocks()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.None);
+
+            // Act
+            var retVal = filter.AllowBlocks(UnicodeBlocks.LatinExtendedA, UnicodeBlocks.LatinExtendedC);
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            for (int i = 0; i < 0x0100; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0100; i <= 0x017F; i++)
+            {
+                Assert.True(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0180; i < 0x2C60; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x2C60; i <= 0x2C7F; i++)
+            {
+                Assert.True(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x2C80; i <= Char.MaxValue; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void AllowChar()
+        {
+            // Arrange
+            var filter = new CodePointFilter();
+
+            // Act
+            var retVal = filter.AllowChar('\u0100');
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            Assert.True(filter.IsCharacterAllowed('\u0100'));
+            Assert.False(filter.IsCharacterAllowed('\u0101'));
+        }
+
+        [Fact]
+        public void AllowChars_Array()
+        {
+            // Arrange
+            var filter = new CodePointFilter();
+
+            // Act
+            var retVal = filter.AllowChars('\u0100', '\u0102');
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            Assert.True(filter.IsCharacterAllowed('\u0100'));
+            Assert.False(filter.IsCharacterAllowed('\u0101'));
+            Assert.True(filter.IsCharacterAllowed('\u0102'));
+            Assert.False(filter.IsCharacterAllowed('\u0103'));
+        }
+
+        [Fact]
+        public void AllowChars_String()
+        {
+            // Arrange
+            var filter = new CodePointFilter();
+
+            // Act
+            var retVal = filter.AllowChars("\u0100\u0102");
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            Assert.True(filter.IsCharacterAllowed('\u0100'));
+            Assert.False(filter.IsCharacterAllowed('\u0101'));
+            Assert.True(filter.IsCharacterAllowed('\u0102'));
+            Assert.False(filter.IsCharacterAllowed('\u0103'));
+        }
+
+        [Fact]
+        public void AllowFilter()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
+
+            // Act
+            var retVal = filter.AllowFilter(new OddCodePointFilter());
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            for (int i = 0; i <= 0x007F; i++)
+            {
+                Assert.True(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0080; i <= Char.MaxValue; i++)
+            {
+                Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void Clear()
+        {
+            // Arrange
+            var filter = new CodePointFilter();
+            for (int i = 1; i <= Char.MaxValue; i++)
+            {
+                filter.AllowChar((char)i);
+            }
+
+            // Act
+            var retVal = filter.Clear();
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void ForbidBlock()
+        {
+            // Arrange
+            var filter = new CodePointFilter(new OddCodePointFilter());
+
+            // Act
+            var retVal = filter.ForbidBlock(UnicodeBlocks.Specials);
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            for (int i = 0; i <= 0xFFEF; i++)
+            {
+                Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0xFFF0; i <= Char.MaxValue; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void ForbidBlocks()
+        {
+            // Arrange
+            var filter = new CodePointFilter(new OddCodePointFilter());
+
+            // Act
+            var retVal = filter.ForbidBlocks(UnicodeBlocks.BasicLatin, UnicodeBlocks.Specials);
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            for (int i = 0; i <= 0x007F; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0x0080; i <= 0xFFEF; i++)
+            {
+                Assert.Equal((i % 2) == 1, filter.IsCharacterAllowed((char)i));
+            }
+            for (int i = 0xFFF0; i <= Char.MaxValue; i++)
+            {
+                Assert.False(filter.IsCharacterAllowed((char)i));
+            }
+        }
+
+        [Fact]
+        public void ForbidChar()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
+
+            // Act
+            var retVal = filter.ForbidChar('x');
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            Assert.True(filter.IsCharacterAllowed('w'));
+            Assert.False(filter.IsCharacterAllowed('x'));
+            Assert.True(filter.IsCharacterAllowed('y'));
+            Assert.True(filter.IsCharacterAllowed('z'));
+        }
+
+        [Fact]
+        public void ForbidChars_Array()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
+
+            // Act
+            var retVal = filter.ForbidChars('x', 'z');
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            Assert.True(filter.IsCharacterAllowed('w'));
+            Assert.False(filter.IsCharacterAllowed('x'));
+            Assert.True(filter.IsCharacterAllowed('y'));
+            Assert.False(filter.IsCharacterAllowed('z'));
+        }
+
+        [Fact]
+        public void ForbidChars_String()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.BasicLatin);
+
+            // Act
+            var retVal = filter.ForbidChars("xz");
+
+            // Assert
+            Assert.Same(filter, retVal); // returns 'this' instance
+            Assert.True(filter.IsCharacterAllowed('w'));
+            Assert.False(filter.IsCharacterAllowed('x'));
+            Assert.True(filter.IsCharacterAllowed('y'));
+            Assert.False(filter.IsCharacterAllowed('z'));
+        }
+
+        [Fact]
+        public void GetAllowedCodePoints()
+        {
+            // Arrange
+            var expected = Enumerable.Range(UnicodeBlocks.BasicLatin.FirstCodePoint, UnicodeBlocks.BasicLatin.BlockSize)
+                .Concat(Enumerable.Range(UnicodeBlocks.Specials.FirstCodePoint, UnicodeBlocks.Specials.BlockSize))
+                .Except(new int[] { 'x' })
+                .OrderBy(i => i)
+                .ToArray();
+
+            var filter = new CodePointFilter(UnicodeBlocks.BasicLatin, UnicodeBlocks.Specials);
+            filter.ForbidChar('x');
+
+            // Act
+            var retVal = filter.GetAllowedCodePoints().OrderBy(i => i).ToArray();
+
+            // Assert
+            Assert.Equal<int>(expected, retVal);
+        }
+
+        // a code point filter which allows only odd code points through
+        private sealed class OddCodePointFilter : ICodePointFilter
+        {
+            public IEnumerable<int> GetAllowedCodePoints()
+            {
+                for (int i = 1; i <= Char.MaxValue; i += 2)
+                {
+                    yield return i;
+                }
+            }
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/EncoderCommonTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/EncoderCommonTests.cs
@ -0,0 +1,21 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class EncoderCommonTests
+    {
+        [Theory]
+        [InlineData(10000, 3, 16 * 1024)] // we cap at 16k chars
+        [InlineData(5000, 3, 15000)] // haven't exceeded the 16k cap
+        [InlineData(40000, 3, 40000)] // if we spill over the LOH, we still allocate an output buffer equivalent in length to the input buffer
+        [InlineData(512, Int32.MaxValue, 16 * 1024)] // make sure we can handle numeric overflow
+        public void GetCapacityOfOutputStringBuilder(int numCharsToEncode, int worstCaseOutputCharsPerInputChar, int expectedResult)
+        {
+            Assert.Equal(expectedResult, EncoderCommon.GetCapacityOfOutputStringBuilder(numCharsToEncode, worstCaseOutputCharsPerInputChar));
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/EncoderExtensionsTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/EncoderExtensionsTests.cs
@ -0,0 +1,72 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.IO;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class EncoderExtensionsTests
+    {
+        [Fact]
+        public void HtmlEncode_ParameterChecks()
+        {
+            Assert.Throws<ArgumentNullException>(() => EncoderExtensions.HtmlEncode(null, "Hello!", new StringWriter()));
+        }
+
+        [Fact]
+        public void HtmlEncode_PositiveTestCase()
+        {
+            // Arrange
+            IHtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
+            StringWriter writer = new StringWriter();
+
+            // Act
+            encoder.HtmlEncode("Hello+there!", writer);
+
+            // Assert
+            Assert.Equal("Hello&#x2B;there!", writer.ToString());
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_ParameterChecks()
+        {
+            Assert.Throws<ArgumentNullException>(() => EncoderExtensions.JavaScriptStringEncode(null, "Hello!", new StringWriter()));
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_PositiveTestCase()
+        {
+            // Arrange
+            IJavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
+            StringWriter writer = new StringWriter();
+
+            // Act
+            encoder.JavaScriptStringEncode("Hello+there!", writer);
+
+            // Assert
+            Assert.Equal(@"Hello\u002Bthere!", writer.ToString());
+        }
+
+        [Fact]
+        public void UrlEncode_ParameterChecks()
+        {
+            Assert.Throws<ArgumentNullException>(() => EncoderExtensions.UrlEncode(null, "Hello!", new StringWriter()));
+        }
+
+        [Fact]
+        public void UrlEncode_PositiveTestCase()
+        {
+            // Arrange
+            IUrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All);
+            StringWriter writer = new StringWriter();
+
+            // Act
+            encoder.UrlEncode("Hello+there!", writer);
+
+            // Assert
+            Assert.Equal("Hello%2Bthere!", writer.ToString());
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/Entities.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/Entities.cs
@ -0,0 +1,38 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Newtonsoft.Json;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    internal static class Entities
+    {
+        public static readonly IDictionary<string, ParsedEntity> ParsedEntities = GetParsedEntities();
+
+        private static IDictionary<string, ParsedEntity> GetParsedEntities()
+        {
+            // read all entries
+            string allEntitiesText = ReadEntitiesJsonFile();
+            var deserializedRawData = new JsonSerializer().Deserialize<IDictionary<string, ParsedEntity>>(new JsonTextReader(new StringReader(allEntitiesText)));
+
+            // strip out all entries which aren't of the form "&entity;"
+            foreach (var key in deserializedRawData.Keys.ToArray() /* dupe since we're mutating original structure */)
+            {
+                if (!key.StartsWith("&", StringComparison.Ordinal) || !key.EndsWith(";", StringComparison.Ordinal))
+                {
+                    deserializedRawData.Remove(key);
+                }
+            }
+            return deserializedRawData;
+        }
+
+        private static string ReadEntitiesJsonFile()
+        {
+            return File.ReadAllText("entities.json");
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/Extensions.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/Extensions.cs
@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public static class Extensions
+    {
+        public static string[] ReadAllLines(this TextReader reader)
+        {
+            return ReadAllLinesImpl(reader).ToArray();
+        }
+
+        private static IEnumerable<string> ReadAllLinesImpl(TextReader reader)
+        {
+            string line;
+            while ((line = reader.ReadLine()) != null)
+            {
+                yield return line;
+            }
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/HtmlEncoderTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/HtmlEncoderTests.cs
@ -0,0 +1,269 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Globalization;
+using System.IO;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class HtmlEncoderTests
+    {
+        [Fact]
+        public void Ctor_WithCodePointFilter()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
+            HtmlEncoder encoder = new HtmlEncoder(filter);
+
+            // Act & assert
+            Assert.Equal("a", encoder.HtmlEncode("a"));
+            Assert.Equal("b", encoder.HtmlEncode("b"));
+            Assert.Equal("&#x63;", encoder.HtmlEncode("c"));
+            Assert.Equal("d", encoder.HtmlEncode("d"));
+            Assert.Equal("&#x0;", encoder.HtmlEncode("\0")); // we still always encode control chars
+            Assert.Equal("&amp;", encoder.HtmlEncode("&")); // we still always encode HTML-special chars
+            Assert.Equal("&#xFFFF;", encoder.HtmlEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
+        }
+
+        [Fact]
+        public void Ctor_WithUnicodeBlocks()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
+
+            // Act & assert
+            Assert.Equal("&#x61;", encoder.HtmlEncode("a"));
+            Assert.Equal("\u00E9", encoder.HtmlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
+            Assert.Equal("\u2601", encoder.HtmlEncode("\u2601" /* CLOUD */));
+        }
+
+        [Fact]
+        public void Ctor_WithNoParameters_DefaultsToBasicLatin()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder();
+
+            // Act & assert
+            Assert.Equal("a", encoder.HtmlEncode("a"));
+            Assert.Equal("&#xE9;", encoder.HtmlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
+            Assert.Equal("&#x2601;", encoder.HtmlEncode("\u2601" /* CLOUD */));
+        }
+
+        [Fact]
+        public void Default_EquivalentToBasicLatin()
+        {
+            // Arrange
+            HtmlEncoder controlEncoder = new HtmlEncoder(UnicodeBlocks.BasicLatin);
+            HtmlEncoder testEncoder = HtmlEncoder.Default;
+
+            // Act & assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                if (!IsSurrogateCodePoint(i))
+                {
+                    string input = new String((char)i, 1);
+                    Assert.Equal(controlEncoder.HtmlEncode(input), testEncoder.HtmlEncode(input));
+                }
+            }
+        }
+
+        [Fact]
+        public void Default_ReturnsSingletonInstance()
+        {
+            // Act
+            HtmlEncoder encoder1 = HtmlEncoder.Default;
+            HtmlEncoder encoder2 = HtmlEncoder.Default;
+
+            // Assert
+            Assert.Same(encoder1, encoder2);
+        }
+
+        [Theory]
+        [InlineData("<", "&lt;")]
+        [InlineData(">", "&gt;")]
+        [InlineData("&", "&amp;")]
+        [InlineData("'", "&#x27;")]
+        [InlineData("\"", "&quot;")]
+        [InlineData("+", "&#x2B;")]
+        public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
+
+            // Act
+            string retVal = encoder.HtmlEncode(input);
+
+            // Assert
+            Assert.Equal(expected, retVal);
+        }
+
+        [Fact]
+        public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All);
+
+            // Act & assert - BMP chars
+            for (int i = 0; i <= 0xFFFF; i++)
+            {
+                string input = new String((char)i, 1);
+                string expected;
+                if (IsSurrogateCodePoint(i))
+                {
+                    expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
+                }
+                else
+                {
+                    if (input == "<") { expected = "&lt;"; }
+                    else if (input == ">") { expected = "&gt;"; }
+                    else if (input == "&") { expected = "&amp;"; }
+                    else if (input == "\"") { expected = "&quot;"; }
+                    else
+                    {
+                        bool mustEncode = false;
+                        if (i == '\'' || i == '+')
+                        {
+                            mustEncode = true; // apostrophe, plus
+                        }
+                        else if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
+                        {
+                            mustEncode = true; // control char
+                        }
+                        else if (!UnicodeHelpers.IsCharacterDefined((char)i))
+                        {
+                            mustEncode = true; // undefined (or otherwise disallowed) char
+                        }
+
+                        if (mustEncode)
+                        {
+                            expected = String.Format(CultureInfo.InvariantCulture, "&#x{0:X};", i);
+                        }
+                        else
+                        {
+                            expected = input; // no encoding
+                        }
+                    }
+                }
+
+                string retVal = encoder.HtmlEncode(input);
+                Assert.Equal(expected, retVal);
+            }
+
+            // Act & assert - astral chars
+            for (int i = 0x10000; i <= 0x10FFFF; i++)
+            {
+                string input = Char.ConvertFromUtf32(i);
+                string expected = String.Format(CultureInfo.InvariantCulture, "&#x{0:X};", i);
+                string retVal = encoder.HtmlEncode(input);
+                Assert.Equal(expected, retVal);
+            }
+        }
+
+        [Fact]
+        public void HtmlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder(UnicodeBlocks.All); // allow all codepoints
+
+            // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
+            const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
+            const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD&#x103FF;e\uFFFD";
+
+            // Act
+            string retVal = encoder.HtmlEncode(input);
+
+            // Assert
+            Assert.Equal(expected, retVal);
+        }
+
+        [Fact]
+        public void HtmlEncode_EmptyStringInput_ReturnsEmptyString()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder();
+
+            // Act & assert
+            Assert.Equal("", encoder.HtmlEncode(""));
+        }
+
+        [Fact]
+        public void HtmlEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder();
+            string input = "Hello, there!";
+
+            // Act & assert
+            Assert.Same(input, encoder.HtmlEncode(input));
+        }
+
+        [Fact]
+        public void HtmlEncode_NullInput_ReturnsNull()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder();
+
+            // Act & assert
+            Assert.Null(encoder.HtmlEncode(null));
+        }
+
+        [Fact]
+        public void HtmlEncode_WithCharsRequiringEncodingAtBeginning()
+        {
+            Assert.Equal("&amp;Hello, there!", new HtmlEncoder().HtmlEncode("&Hello, there!"));
+        }
+
+        [Fact]
+        public void HtmlEncode_WithCharsRequiringEncodingAtEnd()
+        {
+            Assert.Equal("Hello, there!&amp;", new HtmlEncoder().HtmlEncode("Hello, there!&"));
+        }
+
+        [Fact]
+        public void HtmlEncode_WithCharsRequiringEncodingInMiddle()
+        {
+            Assert.Equal("Hello, &amp;there!", new HtmlEncoder().HtmlEncode("Hello, &there!"));
+        }
+
+        [Fact]
+        public void HtmlEncode_WithCharsRequiringEncodingInterspersed()
+        {
+            Assert.Equal("Hello, &lt;there&gt;!", new HtmlEncoder().HtmlEncode("Hello, <there>!"));
+        }
+
+        [Fact]
+        public void HtmlEncode_CharArray()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder();
+            var output = new StringWriter();
+
+            // Act
+            encoder.HtmlEncode("Hello+world!".ToCharArray(), 3, 5, output);
+
+            // Assert
+            Assert.Equal("lo&#x2B;wo", output.ToString());
+        }
+
+        [Fact]
+        public void HtmlEncode_StringSubstring()
+        {
+            // Arrange
+            HtmlEncoder encoder = new HtmlEncoder();
+            var output = new StringWriter();
+
+            // Act
+            encoder.HtmlEncode("Hello+world!", 3, 5, output);
+
+            // Assert
+            Assert.Equal("lo&#x2B;wo", output.ToString());
+        }
+
+        private static bool IsSurrogateCodePoint(int codePoint)
+        {
+            return (0xD800 <= codePoint && codePoint <= 0xDFFF);
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/JavaScriptStringEncoderTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/JavaScriptStringEncoderTests.cs
@ -0,0 +1,331 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Globalization;
+using System.IO;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class JavaScriptStringEncoderTests
+    {
+        [Fact]
+        public void Ctor_WithCodePointFilter()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(filter);
+
+            // Act & assert
+            Assert.Equal("a", encoder.JavaScriptStringEncode("a"));
+            Assert.Equal("b", encoder.JavaScriptStringEncode("b"));
+            Assert.Equal(@"\u0063", encoder.JavaScriptStringEncode("c"));
+            Assert.Equal("d", encoder.JavaScriptStringEncode("d"));
+            Assert.Equal(@"\u0000", encoder.JavaScriptStringEncode("\0")); // we still always encode control chars
+            Assert.Equal(@"\u0026", encoder.JavaScriptStringEncode("&")); // we still always encode HTML-special chars
+            Assert.Equal(@"\uFFFF", encoder.JavaScriptStringEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
+        }
+
+        [Fact]
+        public void Ctor_WithUnicodeBlocks()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
+
+            // Act & assert
+            Assert.Equal(@"\u0061", encoder.JavaScriptStringEncode("a"));
+            Assert.Equal("\u00E9", encoder.JavaScriptStringEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
+            Assert.Equal("\u2601", encoder.JavaScriptStringEncode("\u2601" /* CLOUD */));
+        }
+
+        [Fact]
+        public void Ctor_WithNoParameters_DefaultsToBasicLatin()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
+
+            // Act & assert
+            Assert.Equal("a", encoder.JavaScriptStringEncode("a"));
+            Assert.Equal(@"\u00E9", encoder.JavaScriptStringEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
+            Assert.Equal(@"\u2601", encoder.JavaScriptStringEncode("\u2601" /* CLOUD */));
+        }
+
+        [Fact]
+        public void Default_EquivalentToBasicLatin()
+        {
+            // Arrange
+            JavaScriptStringEncoder controlEncoder = new JavaScriptStringEncoder(UnicodeBlocks.BasicLatin);
+            JavaScriptStringEncoder testEncoder = JavaScriptStringEncoder.Default;
+
+            // Act & assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                if (!IsSurrogateCodePoint(i))
+                {
+                    string input = new String((char)i, 1);
+                    Assert.Equal(controlEncoder.JavaScriptStringEncode(input), testEncoder.JavaScriptStringEncode(input));
+                }
+            }
+        }
+
+        [Fact]
+        public void Default_ReturnsSingletonInstance()
+        {
+            // Act
+            JavaScriptStringEncoder encoder1 = JavaScriptStringEncoder.Default;
+            JavaScriptStringEncoder encoder2 = JavaScriptStringEncoder.Default;
+
+            // Assert
+            Assert.Same(encoder1, encoder2);
+        }
+
+        [Theory]
+        [InlineData("<", @"\u003C")]
+        [InlineData(">", @"\u003E")]
+        [InlineData("&", @"\u0026")]
+        [InlineData("'", @"\u0027")]
+        [InlineData("\"", @"\u0022")]
+        [InlineData("+", @"\u002B")]
+        [InlineData("\\", @"\\")]
+        [InlineData("/", @"\/")]
+        [InlineData("\b", @"\b")]
+        [InlineData("\f", @"\f")]
+        [InlineData("\n", @"\n")]
+        [InlineData("\t", @"\t")]
+        [InlineData("\r", @"\r")]
+        public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
+
+            // Act
+            string retVal = encoder.JavaScriptStringEncode(input);
+
+            // Assert
+            Assert.Equal(expected, retVal);
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
+
+            // Act & assert - BMP chars
+            for (int i = 0; i <= 0xFFFF; i++)
+            {
+                string input = new String((char)i, 1);
+                string expected;
+                if (IsSurrogateCodePoint(i))
+                {
+                    expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
+                }
+                else
+                {
+                    if (input == "\b") { expected = @"\b"; }
+                    else if (input == "\t") { expected = @"\t"; }
+                    else if (input == "\n") { expected = @"\n"; }
+                    else if (input == "\f") { expected = @"\f"; }
+                    else if (input == "\r") { expected = @"\r"; }
+                    else if (input == "\\") { expected = @"\\"; }
+                    else if (input == "/") { expected = @"\/"; }
+                    else
+                    {
+                        bool mustEncode = false;
+                        switch (i)
+                        {
+                            case '<':
+                            case '>':
+                            case '&':
+                            case '\"':
+                            case '\'':
+                            case '+':
+                                mustEncode = true;
+                                break;
+                        }
+
+                        if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
+                        {
+                            mustEncode = true; // control char
+                        }
+                        else if (!UnicodeHelpers.IsCharacterDefined((char)i))
+                        {
+                            mustEncode = true; // undefined (or otherwise disallowed) char
+                        }
+
+                        if (mustEncode)
+                        {
+                            expected = String.Format(CultureInfo.InvariantCulture, @"\u{0:X4}", i);
+                        }
+                        else
+                        {
+                            expected = input; // no encoding
+                        }
+                    }
+                }
+
+                string retVal = encoder.JavaScriptStringEncode(input);
+                Assert.Equal(expected, retVal);
+            }
+
+            // Act & assert - astral chars
+            for (int i = 0x10000; i <= 0x10FFFF; i++)
+            {
+                string input = Char.ConvertFromUtf32(i);
+                string expected = String.Format(CultureInfo.InvariantCulture, @"\u{0:X4}\u{1:X4}", (uint)input[0], (uint)input[1]);
+                string retVal = encoder.JavaScriptStringEncode(input);
+                Assert.Equal(expected, retVal);
+            }
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All); // allow all codepoints
+
+            // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
+            const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
+            const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD\\uD800\\uDFFFe\uFFFD"; // 'D800' 'DFFF' was preserved since it's valid
+
+            // Act
+            string retVal = encoder.JavaScriptStringEncode(input);
+
+            // Assert
+            Assert.Equal(expected, retVal);
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_EmptyStringInput_ReturnsEmptyString()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
+
+            // Act & assert
+            Assert.Equal("", encoder.JavaScriptStringEncode(""));
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
+            string input = "Hello, there!";
+
+            // Act & assert
+            Assert.Same(input, encoder.JavaScriptStringEncode(input));
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_NullInput_ReturnsNull()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
+
+            // Act & assert
+            Assert.Null(encoder.JavaScriptStringEncode(null));
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_WithCharsRequiringEncodingAtBeginning()
+        {
+            Assert.Equal(@"\u0026Hello, there!", new JavaScriptStringEncoder().JavaScriptStringEncode("&Hello, there!"));
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_WithCharsRequiringEncodingAtEnd()
+        {
+            Assert.Equal(@"Hello, there!\u0026", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, there!&"));
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_WithCharsRequiringEncodingInMiddle()
+        {
+            Assert.Equal(@"Hello, \u0026there!", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, &there!"));
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_WithCharsRequiringEncodingInterspersed()
+        {
+            Assert.Equal(@"Hello, \u003Cthere\u003E!", new JavaScriptStringEncoder().JavaScriptStringEncode("Hello, <there>!"));
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_CharArray()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
+            var output = new StringWriter();
+
+            // Act
+            encoder.JavaScriptStringEncode("Hello+world!".ToCharArray(), 3, 5, output);
+
+            // Assert
+            Assert.Equal(@"lo\u002Bwo", output.ToString());
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_StringSubstring()
+        {
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder();
+            var output = new StringWriter();
+
+            // Act
+            encoder.JavaScriptStringEncode("Hello+world!", 3, 5, output);
+
+            // Assert
+            Assert.Equal(@"lo\u002Bwo", output.ToString());
+        }
+
+        [Theory]
+        [InlineData("\"", @"\u0022")]
+        [InlineData("'", @"\u0027")]
+        public void JavaScriptStringEncode_Quotes(string input, string expected)
+        {
+            // Per the design document, we provide additional defense-in-depth
+            // against breaking out of HTML attributes by having the encoders
+            // never emit the ' or " characters. This means that we want to
+            // \u-escape these characters instead of using \' and \".
+
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
+
+            // Act
+            string retVal = encoder.JavaScriptStringEncode(input);
+
+            // Assert
+            Assert.Equal(expected, retVal);
+        }
+
+        [Fact]
+        public void JavaScriptStringEncode_DoesNotOutputHtmlSensitiveCharacters()
+        {
+            // Per the design document, we provide additional defense-in-depth
+            // by never emitting HTML-sensitive characters unescaped.
+
+            // Arrange
+            JavaScriptStringEncoder javaScriptStringEncoder = new JavaScriptStringEncoder(UnicodeBlocks.All);
+            HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeBlocks.All);
+
+            // Act & assert
+            for (int i = 0; i <= 0x10FFFF; i++)
+            {
+                if (IsSurrogateCodePoint(i))
+                {
+                    continue; // surrogates don't matter here
+                }
+
+                string javaScriptStringEncoded = javaScriptStringEncoder.JavaScriptStringEncode(Char.ConvertFromUtf32(i));
+                string thenHtmlEncoded = htmlEncoder.HtmlEncode(javaScriptStringEncoded);
+                Assert.Equal(javaScriptStringEncoded, thenHtmlEncoded); // should have contained no HTML-sensitive characters
+            }
+        }
+
+        private static bool IsSurrogateCodePoint(int codePoint)
+        {
+            return (0xD800 <= codePoint && codePoint <= 0xDFFF);
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/Microsoft.Framework.WebEncoders.Tests.kproj
+++ b/test/Microsoft.Framework.WebEncoders.Tests/Microsoft.Framework.WebEncoders.Tests.kproj
@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\AspNet\Microsoft.Web.AspNet.Props" Condition="'$(VSToolsPath)' != ''" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>7ae2731d-43cd-4cf8-850a-4914de2ce930</ProjectGuid>
+    <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">..\..\artifacts\obj\$(MSBuildProjectName)</BaseIntermediateOutputPath>
+    <OutputPath Condition="'$(OutputPath)'=='' ">..\..\artifacts\bin\$(MSBuildProjectName)\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SchemaVersion>2.0</SchemaVersion>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\AspNet\Microsoft.Web.AspNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
--- a/test/Microsoft.Framework.WebEncoders.Tests/ParsedEntity.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/ParsedEntity.cs
@ -0,0 +1,17 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using Newtonsoft.Json;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    internal sealed class ParsedEntity
+    {
+        [JsonProperty("codepoints")]
+        public int[] Codepoints { get; set; }
+
+        [JsonProperty("characters")]
+        public string DecodedString { get; set; }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/UnicodeBlockTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/UnicodeBlockTests.cs
@ -0,0 +1,86 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class UnicodeBlockTests
+    {
+        [Theory]
+        [InlineData(-1, 16)]
+        [InlineData(1, 16)]
+        [InlineData(0x10000, 16)]
+        public void Ctor_FailureCase_FirstCodePoint(int firstCodePoint, int blockSize)
+        {
+            var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeBlock(firstCodePoint, blockSize));
+            Assert.Equal("firstCodePoint", ex.ParamName);
+        }
+
+        [Theory]
+        [InlineData(0x0100, -1)]
+        [InlineData(0x0100, 15)]
+        [InlineData(0x0100, 0x10000)]
+        public void Ctor_FailureCase_BlockSize(int firstCodePoint, int blockSize)
+        {
+            var ex = Assert.Throws<ArgumentOutOfRangeException>(() => new UnicodeBlock(firstCodePoint, blockSize));
+            Assert.Equal("blockSize", ex.ParamName);
+        }
+
+        [Fact]
+        public void Ctor_SuccessCase()
+        {
+            // Act
+            var block = new UnicodeBlock(0x0100, 128); // Latin Extended-A
+
+            // Assert
+            Assert.Equal(0x0100, block.FirstCodePoint);
+            Assert.Equal(128, block.BlockSize);
+        }
+
+        [Theory]
+        [InlineData('\u0001', '\u0002')]
+        public void FromCharacterRange_FailureCases_FirstChar(char firstChar, char lastChar)
+        {
+            var ex = Assert.Throws<ArgumentOutOfRangeException>(() => UnicodeBlock.FromCharacterRange(firstChar, lastChar));
+            Assert.Equal("firstChar", ex.ParamName);
+        }
+
+        [Theory]
+        [InlineData('\u0100', '\u007F')]
+        [InlineData('\u0100', '\u0100')]
+        [InlineData('\u0100', '\u010E')]
+        public void FromCharacterRange_FailureCases_LastChar(char firstChar, char lastChar)
+        {
+            var ex = Assert.Throws<ArgumentOutOfRangeException>(() => UnicodeBlock.FromCharacterRange(firstChar, lastChar));
+            Assert.Equal("lastChar", ex.ParamName);
+        }
+
+        [Fact]
+        public void FromCharacterRange_SuccessCase()
+        {
+            // Act
+            var block = UnicodeBlock.FromCharacterRange('\u0180', '\u024F'); // Latin Extended-B
+
+            // Assert
+            Assert.Equal(0x0180, block.FirstCodePoint);
+            Assert.Equal(208, block.BlockSize);
+        }
+
+        [Fact]
+        public void FromCharacterRange_SuccessCase_All()
+        {
+            // Act
+            var block = UnicodeBlock.FromCharacterRange('\u0000', '\uFFFF');
+
+            // Assert
+            Assert.Equal(0, block.FirstCodePoint);
+            Assert.Equal(0x10000, block.BlockSize);
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/UnicodeBlocksTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/UnicodeBlocksTests.cs
@ -0,0 +1,210 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Reflection;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class UnicodeBlocksTests
+    {
+        [Fact]
+        public void Block_None()
+        {
+            UnicodeBlock block = UnicodeBlocks.None;
+            Assert.NotNull(block);
+
+            // Test 1: the block should be empty
+            Assert.Equal(0, block.FirstCodePoint);
+            Assert.Equal(0, block.BlockSize);
+
+            // Test 2: calling the property multiple times should cache and return the same block instance
+            UnicodeBlock block2 = UnicodeBlocks.None;
+            Assert.Same(block, block2);
+        }
+
+        [Fact]
+        public void Block_All()
+        {
+            Block_Unicode('\u0000', '\uFFFF', nameof(UnicodeBlocks.All));
+        }
+
+        [Theory]
+        [InlineData('\u0000', '\u007F', nameof(UnicodeBlocks.BasicLatin))]
+        [InlineData('\u0080', '\u00FF', nameof(UnicodeBlocks.Latin1Supplement))]
+        [InlineData('\u0100', '\u017F', nameof(UnicodeBlocks.LatinExtendedA))]
+        [InlineData('\u0180', '\u024F', nameof(UnicodeBlocks.LatinExtendedB))]
+        [InlineData('\u0250', '\u02AF', nameof(UnicodeBlocks.IPAExtensions))]
+        [InlineData('\u02B0', '\u02FF', nameof(UnicodeBlocks.SpacingModifierLetters))]
+        [InlineData('\u0300', '\u036F', nameof(UnicodeBlocks.CombiningDiacriticalMarks))]
+        [InlineData('\u0370', '\u03FF', nameof(UnicodeBlocks.GreekandCoptic))]
+        [InlineData('\u0400', '\u04FF', nameof(UnicodeBlocks.Cyrillic))]
+        [InlineData('\u0500', '\u052F', nameof(UnicodeBlocks.CyrillicSupplement))]
+        [InlineData('\u0530', '\u058F', nameof(UnicodeBlocks.Armenian))]
+        [InlineData('\u0590', '\u05FF', nameof(UnicodeBlocks.Hebrew))]
+        [InlineData('\u0600', '\u06FF', nameof(UnicodeBlocks.Arabic))]
+        [InlineData('\u0700', '\u074F', nameof(UnicodeBlocks.Syriac))]
+        [InlineData('\u0750', '\u077F', nameof(UnicodeBlocks.ArabicSupplement))]
+        [InlineData('\u0780', '\u07BF', nameof(UnicodeBlocks.Thaana))]
+        [InlineData('\u07C0', '\u07FF', nameof(UnicodeBlocks.NKo))]
+        [InlineData('\u0800', '\u083F', nameof(UnicodeBlocks.Samaritan))]
+        [InlineData('\u0840', '\u085F', nameof(UnicodeBlocks.Mandaic))]
+        [InlineData('\u08A0', '\u08FF', nameof(UnicodeBlocks.ArabicExtendedA))]
+        [InlineData('\u0900', '\u097F', nameof(UnicodeBlocks.Devanagari))]
+        [InlineData('\u0980', '\u09FF', nameof(UnicodeBlocks.Bengali))]
+        [InlineData('\u0A00', '\u0A7F', nameof(UnicodeBlocks.Gurmukhi))]
+        [InlineData('\u0A80', '\u0AFF', nameof(UnicodeBlocks.Gujarati))]
+        [InlineData('\u0B00', '\u0B7F', nameof(UnicodeBlocks.Oriya))]
+        [InlineData('\u0B80', '\u0BFF', nameof(UnicodeBlocks.Tamil))]
+        [InlineData('\u0C00', '\u0C7F', nameof(UnicodeBlocks.Telugu))]
+        [InlineData('\u0C80', '\u0CFF', nameof(UnicodeBlocks.Kannada))]
+        [InlineData('\u0D00', '\u0D7F', nameof(UnicodeBlocks.Malayalam))]
+        [InlineData('\u0D80', '\u0DFF', nameof(UnicodeBlocks.Sinhala))]
+        [InlineData('\u0E00', '\u0E7F', nameof(UnicodeBlocks.Thai))]
+        [InlineData('\u0E80', '\u0EFF', nameof(UnicodeBlocks.Lao))]
+        [InlineData('\u0F00', '\u0FFF', nameof(UnicodeBlocks.Tibetan))]
+        [InlineData('\u1000', '\u109F', nameof(UnicodeBlocks.Myanmar))]
+        [InlineData('\u10A0', '\u10FF', nameof(UnicodeBlocks.Georgian))]
+        [InlineData('\u1100', '\u11FF', nameof(UnicodeBlocks.HangulJamo))]
+        [InlineData('\u1200', '\u137F', nameof(UnicodeBlocks.Ethiopic))]
+        [InlineData('\u1380', '\u139F', nameof(UnicodeBlocks.EthiopicSupplement))]
+        [InlineData('\u13A0', '\u13FF', nameof(UnicodeBlocks.Cherokee))]
+        [InlineData('\u1400', '\u167F', nameof(UnicodeBlocks.UnifiedCanadianAboriginalSyllabics))]
+        [InlineData('\u1680', '\u169F', nameof(UnicodeBlocks.Ogham))]
+        [InlineData('\u16A0', '\u16FF', nameof(UnicodeBlocks.Runic))]
+        [InlineData('\u1700', '\u171F', nameof(UnicodeBlocks.Tagalog))]
+        [InlineData('\u1720', '\u173F', nameof(UnicodeBlocks.Hanunoo))]
+        [InlineData('\u1740', '\u175F', nameof(UnicodeBlocks.Buhid))]
+        [InlineData('\u1760', '\u177F', nameof(UnicodeBlocks.Tagbanwa))]
+        [InlineData('\u1780', '\u17FF', nameof(UnicodeBlocks.Khmer))]
+        [InlineData('\u1800', '\u18AF', nameof(UnicodeBlocks.Mongolian))]
+        [InlineData('\u18B0', '\u18FF', nameof(UnicodeBlocks.UnifiedCanadianAboriginalSyllabicsExtended))]
+        [InlineData('\u1900', '\u194F', nameof(UnicodeBlocks.Limbu))]
+        [InlineData('\u1950', '\u197F', nameof(UnicodeBlocks.TaiLe))]
+        [InlineData('\u1980', '\u19DF', nameof(UnicodeBlocks.NewTaiLue))]
+        [InlineData('\u19E0', '\u19FF', nameof(UnicodeBlocks.KhmerSymbols))]
+        [InlineData('\u1A00', '\u1A1F', nameof(UnicodeBlocks.Buginese))]
+        [InlineData('\u1A20', '\u1AAF', nameof(UnicodeBlocks.TaiTham))]
+        [InlineData('\u1AB0', '\u1AFF', nameof(UnicodeBlocks.CombiningDiacriticalMarksExtended))]
+        [InlineData('\u1B00', '\u1B7F', nameof(UnicodeBlocks.Balinese))]
+        [InlineData('\u1B80', '\u1BBF', nameof(UnicodeBlocks.Sundanese))]
+        [InlineData('\u1BC0', '\u1BFF', nameof(UnicodeBlocks.Batak))]
+        [InlineData('\u1C00', '\u1C4F', nameof(UnicodeBlocks.Lepcha))]
+        [InlineData('\u1C50', '\u1C7F', nameof(UnicodeBlocks.OlChiki))]
+        [InlineData('\u1CC0', '\u1CCF', nameof(UnicodeBlocks.SundaneseSupplement))]
+        [InlineData('\u1CD0', '\u1CFF', nameof(UnicodeBlocks.VedicExtensions))]
+        [InlineData('\u1D00', '\u1D7F', nameof(UnicodeBlocks.PhoneticExtensions))]
+        [InlineData('\u1D80', '\u1DBF', nameof(UnicodeBlocks.PhoneticExtensionsSupplement))]
+        [InlineData('\u1DC0', '\u1DFF', nameof(UnicodeBlocks.CombiningDiacriticalMarksSupplement))]
+        [InlineData('\u1E00', '\u1EFF', nameof(UnicodeBlocks.LatinExtendedAdditional))]
+        [InlineData('\u1F00', '\u1FFF', nameof(UnicodeBlocks.GreekExtended))]
+        [InlineData('\u2000', '\u206F', nameof(UnicodeBlocks.GeneralPunctuation))]
+        [InlineData('\u2070', '\u209F', nameof(UnicodeBlocks.SuperscriptsandSubscripts))]
+        [InlineData('\u20A0', '\u20CF', nameof(UnicodeBlocks.CurrencySymbols))]
+        [InlineData('\u20D0', '\u20FF', nameof(UnicodeBlocks.CombiningDiacriticalMarksforSymbols))]
+        [InlineData('\u2100', '\u214F', nameof(UnicodeBlocks.LetterlikeSymbols))]
+        [InlineData('\u2150', '\u218F', nameof(UnicodeBlocks.NumberForms))]
+        [InlineData('\u2190', '\u21FF', nameof(UnicodeBlocks.Arrows))]
+        [InlineData('\u2200', '\u22FF', nameof(UnicodeBlocks.MathematicalOperators))]
+        [InlineData('\u2300', '\u23FF', nameof(UnicodeBlocks.MiscellaneousTechnical))]
+        [InlineData('\u2400', '\u243F', nameof(UnicodeBlocks.ControlPictures))]
+        [InlineData('\u2440', '\u245F', nameof(UnicodeBlocks.OpticalCharacterRecognition))]
+        [InlineData('\u2460', '\u24FF', nameof(UnicodeBlocks.EnclosedAlphanumerics))]
+        [InlineData('\u2500', '\u257F', nameof(UnicodeBlocks.BoxDrawing))]
+        [InlineData('\u2580', '\u259F', nameof(UnicodeBlocks.BlockElements))]
+        [InlineData('\u25A0', '\u25FF', nameof(UnicodeBlocks.GeometricShapes))]
+        [InlineData('\u2600', '\u26FF', nameof(UnicodeBlocks.MiscellaneousSymbols))]
+        [InlineData('\u2700', '\u27BF', nameof(UnicodeBlocks.Dingbats))]
+        [InlineData('\u27C0', '\u27EF', nameof(UnicodeBlocks.MiscellaneousMathematicalSymbolsA))]
+        [InlineData('\u27F0', '\u27FF', nameof(UnicodeBlocks.SupplementalArrowsA))]
+        [InlineData('\u2800', '\u28FF', nameof(UnicodeBlocks.BraillePatterns))]
+        [InlineData('\u2900', '\u297F', nameof(UnicodeBlocks.SupplementalArrowsB))]
+        [InlineData('\u2980', '\u29FF', nameof(UnicodeBlocks.MiscellaneousMathematicalSymbolsB))]
+        [InlineData('\u2A00', '\u2AFF', nameof(UnicodeBlocks.SupplementalMathematicalOperators))]
+        [InlineData('\u2B00', '\u2BFF', nameof(UnicodeBlocks.MiscellaneousSymbolsandArrows))]
+        [InlineData('\u2C00', '\u2C5F', nameof(UnicodeBlocks.Glagolitic))]
+        [InlineData('\u2C60', '\u2C7F', nameof(UnicodeBlocks.LatinExtendedC))]
+        [InlineData('\u2C80', '\u2CFF', nameof(UnicodeBlocks.Coptic))]
+        [InlineData('\u2D00', '\u2D2F', nameof(UnicodeBlocks.GeorgianSupplement))]
+        [InlineData('\u2D30', '\u2D7F', nameof(UnicodeBlocks.Tifinagh))]
+        [InlineData('\u2D80', '\u2DDF', nameof(UnicodeBlocks.EthiopicExtended))]
+        [InlineData('\u2DE0', '\u2DFF', nameof(UnicodeBlocks.CyrillicExtendedA))]
+        [InlineData('\u2E00', '\u2E7F', nameof(UnicodeBlocks.SupplementalPunctuation))]
+        [InlineData('\u2E80', '\u2EFF', nameof(UnicodeBlocks.CJKRadicalsSupplement))]
+        [InlineData('\u2F00', '\u2FDF', nameof(UnicodeBlocks.KangxiRadicals))]
+        [InlineData('\u2FF0', '\u2FFF', nameof(UnicodeBlocks.IdeographicDescriptionCharacters))]
+        [InlineData('\u3000', '\u303F', nameof(UnicodeBlocks.CJKSymbolsandPunctuation))]
+        [InlineData('\u3040', '\u309F', nameof(UnicodeBlocks.Hiragana))]
+        [InlineData('\u30A0', '\u30FF', nameof(UnicodeBlocks.Katakana))]
+        [InlineData('\u3100', '\u312F', nameof(UnicodeBlocks.Bopomofo))]
+        [InlineData('\u3130', '\u318F', nameof(UnicodeBlocks.HangulCompatibilityJamo))]
+        [InlineData('\u3190', '\u319F', nameof(UnicodeBlocks.Kanbun))]
+        [InlineData('\u31A0', '\u31BF', nameof(UnicodeBlocks.BopomofoExtended))]
+        [InlineData('\u31C0', '\u31EF', nameof(UnicodeBlocks.CJKStrokes))]
+        [InlineData('\u31F0', '\u31FF', nameof(UnicodeBlocks.KatakanaPhoneticExtensions))]
+        [InlineData('\u3200', '\u32FF', nameof(UnicodeBlocks.EnclosedCJKLettersandMonths))]
+        [InlineData('\u3300', '\u33FF', nameof(UnicodeBlocks.CJKCompatibility))]
+        [InlineData('\u3400', '\u4DBF', nameof(UnicodeBlocks.CJKUnifiedIdeographsExtensionA))]
+        [InlineData('\u4DC0', '\u4DFF', nameof(UnicodeBlocks.YijingHexagramSymbols))]
+        [InlineData('\u4E00', '\u9FFF', nameof(UnicodeBlocks.CJKUnifiedIdeographs))]
+        [InlineData('\uA000', '\uA48F', nameof(UnicodeBlocks.YiSyllables))]
+        [InlineData('\uA490', '\uA4CF', nameof(UnicodeBlocks.YiRadicals))]
+        [InlineData('\uA4D0', '\uA4FF', nameof(UnicodeBlocks.Lisu))]
+        [InlineData('\uA500', '\uA63F', nameof(UnicodeBlocks.Vai))]
+        [InlineData('\uA640', '\uA69F', nameof(UnicodeBlocks.CyrillicExtendedB))]
+        [InlineData('\uA6A0', '\uA6FF', nameof(UnicodeBlocks.Bamum))]
+        [InlineData('\uA700', '\uA71F', nameof(UnicodeBlocks.ModifierToneLetters))]
+        [InlineData('\uA720', '\uA7FF', nameof(UnicodeBlocks.LatinExtendedD))]
+        [InlineData('\uA800', '\uA82F', nameof(UnicodeBlocks.SylotiNagri))]
+        [InlineData('\uA830', '\uA83F', nameof(UnicodeBlocks.CommonIndicNumberForms))]
+        [InlineData('\uA840', '\uA87F', nameof(UnicodeBlocks.Phagspa))]
+        [InlineData('\uA880', '\uA8DF', nameof(UnicodeBlocks.Saurashtra))]
+        [InlineData('\uA8E0', '\uA8FF', nameof(UnicodeBlocks.DevanagariExtended))]
+        [InlineData('\uA900', '\uA92F', nameof(UnicodeBlocks.KayahLi))]
+        [InlineData('\uA930', '\uA95F', nameof(UnicodeBlocks.Rejang))]
+        [InlineData('\uA960', '\uA97F', nameof(UnicodeBlocks.HangulJamoExtendedA))]
+        [InlineData('\uA980', '\uA9DF', nameof(UnicodeBlocks.Javanese))]
+        [InlineData('\uA9E0', '\uA9FF', nameof(UnicodeBlocks.MyanmarExtendedB))]
+        [InlineData('\uAA00', '\uAA5F', nameof(UnicodeBlocks.Cham))]
+        [InlineData('\uAA60', '\uAA7F', nameof(UnicodeBlocks.MyanmarExtendedA))]
+        [InlineData('\uAA80', '\uAADF', nameof(UnicodeBlocks.TaiViet))]
+        [InlineData('\uAAE0', '\uAAFF', nameof(UnicodeBlocks.MeeteiMayekExtensions))]
+        [InlineData('\uAB00', '\uAB2F', nameof(UnicodeBlocks.EthiopicExtendedA))]
+        [InlineData('\uAB30', '\uAB6F', nameof(UnicodeBlocks.LatinExtendedE))]
+        [InlineData('\uABC0', '\uABFF', nameof(UnicodeBlocks.MeeteiMayek))]
+        [InlineData('\uAC00', '\uD7AF', nameof(UnicodeBlocks.HangulSyllables))]
+        [InlineData('\uD7B0', '\uD7FF', nameof(UnicodeBlocks.HangulJamoExtendedB))]
+        [InlineData('\uF900', '\uFAFF', nameof(UnicodeBlocks.CJKCompatibilityIdeographs))]
+        [InlineData('\uFB00', '\uFB4F', nameof(UnicodeBlocks.AlphabeticPresentationForms))]
+        [InlineData('\uFB50', '\uFDFF', nameof(UnicodeBlocks.ArabicPresentationFormsA))]
+        [InlineData('\uFE00', '\uFE0F', nameof(UnicodeBlocks.VariationSelectors))]
+        [InlineData('\uFE10', '\uFE1F', nameof(UnicodeBlocks.VerticalForms))]
+        [InlineData('\uFE20', '\uFE2F', nameof(UnicodeBlocks.CombiningHalfMarks))]
+        [InlineData('\uFE30', '\uFE4F', nameof(UnicodeBlocks.CJKCompatibilityForms))]
+        [InlineData('\uFE50', '\uFE6F', nameof(UnicodeBlocks.SmallFormVariants))]
+        [InlineData('\uFE70', '\uFEFF', nameof(UnicodeBlocks.ArabicPresentationFormsB))]
+        [InlineData('\uFF00', '\uFFEF', nameof(UnicodeBlocks.HalfwidthandFullwidthForms))]
+        [InlineData('\uFFF0', '\uFFFF', nameof(UnicodeBlocks.Specials))]
+        public void Block_Unicode(char first, char last, string blockName)
+        {
+            Assert.Equal(0x0, first & 0xF); // first char in any block should be U+nnn0
+            Assert.Equal(0xF, last & 0xF); // last char in any block should be U+nnnF
+            Assert.True(first < last); // code point ranges should be ordered
+
+            var propInfo = typeof(UnicodeBlocks).GetProperty(blockName, BindingFlags.Public | BindingFlags.Static);
+            Assert.NotNull(propInfo);
+
+            UnicodeBlock block = (UnicodeBlock)propInfo.GetValue(null);
+            Assert.NotNull(block);
+
+            // Test 1: the block should span the range first..last
+            Assert.Equal(first, block.FirstCodePoint);
+            Assert.Equal(last, block.FirstCodePoint + block.BlockSize - 1);
+
+            // Test 2: calling the property multiple times should cache and return the same block instance
+            UnicodeBlock block2 = (UnicodeBlock)propInfo.GetValue(null);
+            Assert.Same(block, block2);
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/UnicodeEncoderBaseTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/UnicodeEncoderBaseTests.cs
@ -0,0 +1,406 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using Moq;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class UnicodeEncoderBaseTests
+    {
+        [Fact]
+        public void Ctor_WithCustomFilters()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
+            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(filter);
+
+            // Act & assert
+            Assert.Equal("a", encoder.Encode("a"));
+            Assert.Equal("b", encoder.Encode("b"));
+            Assert.Equal("[U+0063]", encoder.Encode("c"));
+            Assert.Equal("d", encoder.Encode("d"));
+            Assert.Equal("[U+0000]", encoder.Encode("\0")); // we still always encode control chars
+            Assert.Equal("[U+0026]", encoder.Encode("&")); // we still always encode HTML-special chars
+            Assert.Equal("[U+FFFF]", encoder.Encode("\uFFFF")); // we still always encode non-chars and other forbidden chars
+        }
+
+        [Fact]
+        public void Ctor_WithUnicodeBlocks()
+        {
+            // Arrange
+            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(new CodePointFilter(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols));
+
+            // Act & assert
+            Assert.Equal("[U+0061]", encoder.Encode("a"));
+            Assert.Equal("\u00E9", encoder.Encode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
+            Assert.Equal("\u2601", encoder.Encode("\u2601" /* CLOUD */));
+        }
+
+        [Fact]
+        public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple()
+        {
+            // Arrange
+            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+            const string input = "Hello <>&\'\"+ there!";
+            const string expected = "Hello [U+003C][U+003E][U+0026][U+0027][U+0022][U+002B] there!";
+
+            // Act & assert
+            Assert.Equal(expected, encoder.Encode(input));
+        }
+
+        [Fact]
+        public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
+        {
+            // Arrange
+            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+
+            // Act & assert - BMP chars
+            for (int i = 0; i <= 0xFFFF; i++)
+            {
+                string input = new String((char)i, 1);
+                string expected;
+                if (IsSurrogateCodePoint(i))
+                {
+                    expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
+                }
+                else
+                {
+                    bool mustEncode = false;
+                    switch (i)
+                    {
+                        case '<':
+                        case '>':
+                        case '&':
+                        case '\"':
+                        case '\'':
+                        case '+':
+                            mustEncode = true;
+                            break;
+                    }
+
+                    if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
+                    {
+                        mustEncode = true; // control char
+                    }
+                    else if (!UnicodeHelpers.IsCharacterDefined((char)i))
+                    {
+                        mustEncode = true; // undefined (or otherwise disallowed) char
+                    }
+
+                    if (mustEncode)
+                    {
+                        expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", i);
+                    }
+                    else
+                    {
+                        expected = input; // no encoding
+                    }
+                }
+
+                string retVal = encoder.Encode(input);
+                Assert.Equal(expected, retVal);
+            }
+
+            // Act & assert - astral chars
+            for (int i = 0x10000; i <= 0x10FFFF; i++)
+            {
+                string input = Char.ConvertFromUtf32(i);
+                string expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X}]", i);
+                string retVal = encoder.Encode(input);
+                Assert.Equal(expected, retVal);
+            }
+        }
+
+        [Fact]
+        public void Encode_BadSurrogates_ReturnsUnicodeReplacementChar()
+        {
+            // Arrange
+            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All); // allow all codepoints
+
+            // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
+            const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
+            const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD[U+103FF]e\uFFFD";
+
+            // Act
+            string retVal = encoder.Encode(input);
+
+            // Assert
+            Assert.Equal(expected, retVal);
+        }
+
+        [Fact]
+        public void Encode_EmptyStringInput_ReturnsEmptyString()
+        {
+            // Arrange
+            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+
+            // Act & assert
+            Assert.Equal("", encoder.Encode(""));
+        }
+
+        [Fact]
+        public void Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
+        {
+            // Arrange
+            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+            string input = "Hello, there!";
+
+            // Act & assert
+            Assert.Same(input, encoder.Encode(input));
+        }
+
+        [Fact]
+        public void Encode_NullInput_ReturnsNull()
+        {
+            // Arrange
+            UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+
+            // Act & assert
+            Assert.Null(encoder.Encode(null));
+        }
+
+        [Fact]
+        public void Encode_WithCharsRequiringEncodingAtBeginning()
+        {
+            Assert.Equal("[U+0026]Hello, there!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("&Hello, there!"));
+        }
+
+        [Fact]
+        public void Encode_WithCharsRequiringEncodingAtEnd()
+        {
+            Assert.Equal("Hello, there![U+0026]", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, there!&"));
+        }
+
+        [Fact]
+        public void Encode_WithCharsRequiringEncodingInMiddle()
+        {
+            Assert.Equal("Hello, [U+0026]there!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, &there!"));
+        }
+
+        [Fact]
+        public void Encode_WithCharsRequiringEncodingInterspersed()
+        {
+            Assert.Equal("Hello, [U+003C]there[U+003E]!", new CustomUnicodeEncoderBase(UnicodeBlocks.All).Encode("Hello, <there>!"));
+        }
+
+        [Fact]
+        public void Encode_CharArray_ParameterChecking_NegativeTestCases()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
+
+            // Act & assert
+            Assert.Throws<ArgumentNullException>(() => encoder.Encode((char[])null, 0, 0, new StringWriter()));
+            Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc".ToCharArray(), 0, 3, null));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), -1, 2, new StringWriter()));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, 2, new StringWriter()));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 4, 0, new StringWriter()));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, -1, new StringWriter()));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 1, 3, new StringWriter()));
+        }
+
+        [Fact]
+        public void Encode_CharArray_ZeroCount_DoesNotCallIntoTextWriter()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
+            TextWriter output = new Mock<TextWriter>(MockBehavior.Strict).Object;
+
+            // Act
+            encoder.Encode("abc".ToCharArray(), 2, 0, output);
+
+            // Assert
+            // If we got this far (without TextWriter throwing), success!
+        }
+
+        [Fact]
+        public void Encode_CharArray_AllCharsValid()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+            StringWriter output = new StringWriter();
+
+            // Act
+            encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);
+
+            // Assert
+            Assert.Equal("xy", output.ToString());
+        }
+
+        [Fact]
+        public void Encode_CharArray_AllCharsInvalid()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.None);
+            StringWriter output = new StringWriter();
+
+            // Act
+            encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);
+
+            // Assert
+            Assert.Equal("[U+0078][U+0079]", output.ToString());
+        }
+
+        [Fact]
+        public void Encode_CharArray_SomeCharsValid()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+            StringWriter output = new StringWriter();
+
+            // Act
+            encoder.Encode("abc&xyz".ToCharArray(), 2, 3, output);
+
+            // Assert
+            Assert.Equal("c[U+0026]x", output.ToString());
+        }
+
+        [Fact]
+        public void Encode_StringSubstring_ParameterChecking_NegativeTestCases()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
+
+            // Act & assert
+            Assert.Throws<ArgumentNullException>(() => encoder.Encode((string)null, 0, 0, new StringWriter()));
+            Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc", 0, 3, null));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", -1, 2, new StringWriter()));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, 2, new StringWriter()));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 4, 0, new StringWriter()));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, -1, new StringWriter()));
+            Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 1, 3, new StringWriter()));
+        }
+
+        [Fact]
+        public void Encode_StringSubstring_ZeroCount_DoesNotCallIntoTextWriter()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
+            TextWriter output = new Mock<TextWriter>(MockBehavior.Strict).Object;
+
+            // Act
+            encoder.Encode("abc", 2, 0, output);
+
+            // Assert
+            // If we got this far (without TextWriter throwing), success!
+        }
+
+        [Fact]
+        public void Encode_StringSubstring_AllCharsValid()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+            StringWriter output = new StringWriter();
+
+            // Act
+            encoder.Encode("abc&xyz", 4, 2, output);
+
+            // Assert
+            Assert.Equal("xy", output.ToString());
+        }
+
+        [Fact]
+        public void Encode_StringSubstring_EntireString_AllCharsValid_ForwardDirectlyToOutput()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+            var mockWriter = new Mock<TextWriter>(MockBehavior.Strict);
+            mockWriter.Setup(o => o.Write("abc")).Verifiable();
+
+            // Act
+            encoder.Encode("abc", 0, 3, mockWriter.Object);
+
+            // Assert
+            mockWriter.Verify();
+        }
+
+        [Fact]
+        public void Encode_StringSubstring_AllCharsInvalid()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.None);
+            StringWriter output = new StringWriter();
+
+            // Act
+            encoder.Encode("abc&xyz", 4, 2, output);
+
+            // Assert
+            Assert.Equal("[U+0078][U+0079]", output.ToString());
+        }
+
+        [Fact]
+        public void Encode_StringSubstring_SomeCharsValid()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+            StringWriter output = new StringWriter();
+
+            // Act
+            encoder.Encode("abc&xyz", 2, 3, output);
+
+            // Assert
+            Assert.Equal("c[U+0026]x", output.ToString());
+        }
+
+        [Fact]
+        public void Encode_StringSubstring_EntireString_SomeCharsValid()
+        {
+            // Arrange
+            CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeBlocks.All);
+            StringWriter output = new StringWriter();
+
+            // Act
+            const string input = "abc&xyz";
+            encoder.Encode(input, 0, input.Length, output);
+
+            // Assert
+            Assert.Equal("abc[U+0026]xyz", output.ToString());
+        }
+
+        private static bool IsSurrogateCodePoint(int codePoint)
+        {
+            return (0xD800 <= codePoint && codePoint <= 0xDFFF);
+        }
+
+        private sealed class CustomCodePointFilter : ICodePointFilter
+        {
+            private readonly int[] _allowedCodePoints;
+
+            public CustomCodePointFilter(params int[] allowedCodePoints)
+            {
+                _allowedCodePoints = allowedCodePoints;
+            }
+
+            public IEnumerable<int> GetAllowedCodePoints()
+            {
+                return _allowedCodePoints;
+            }
+        }
+
+        private sealed class CustomUnicodeEncoderBase : UnicodeEncoderBase
+        {
+            // We pass a (known bad) value of 1 for 'max output chars per input char',
+            // which also tests that the code behaves properly even if the original
+            // estimate is incorrect.
+            public CustomUnicodeEncoderBase(CodePointFilter filter)
+                : base(filter, maxOutputCharsPerInputChar: 1)
+            {
+            }
+
+            public CustomUnicodeEncoderBase(params UnicodeBlock[] allowedBlocks)
+                : this(new CodePointFilter(allowedBlocks))
+            {
+            }
+
+            protected override void WriteEncodedScalar(ref Writer writer, uint value)
+            {
+                writer.Write(String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", value));
+            }
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/UnicodeHelpersTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/UnicodeHelpersTests.cs
@ -0,0 +1,184 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+using System.Text;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public unsafe class UnicodeHelpersTests
+    {
+        private const int UnicodeReplacementChar = '\uFFFD';
+
+        private static readonly UTF8Encoding _utf8EncodingThrowOnInvalidBytes = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
+
+        [Fact]
+        public void GetDefinedCharacterBitmap_ReturnsSingletonInstance()
+        {
+            // Act
+            uint[] retVal1 = UnicodeHelpers.GetDefinedCharacterBitmap();
+            uint[] retVal2 = UnicodeHelpers.GetDefinedCharacterBitmap();
+
+            // Assert
+            Assert.Same(retVal1, retVal2);
+        }
+
+        [Theory]
+        [InlineData(1, "a", (int)'a')] // normal BMP char, end of string
+        [InlineData(2, "ab", (int)'a')] // normal BMP char, not end of string
+        [InlineData(3, "\uDFFF", UnicodeReplacementChar)] // trailing surrogate, end of string
+        [InlineData(4, "\uDFFFx", UnicodeReplacementChar)] // trailing surrogate, not end of string
+        [InlineData(5, "\uD800", UnicodeReplacementChar)] // leading surrogate, end of string
+        [InlineData(6, "\uD800x", UnicodeReplacementChar)] // leading surrogate, not end of string, followed by non-surrogate
+        [InlineData(7, "\uD800\uD800", UnicodeReplacementChar)] // leading surrogate, not end of string, followed by leading surrogate
+        [InlineData(8, "\uD800\uDFFF", 0x103FF)] // leading surrogate, not end of string, followed by trailing surrogate
+        public void GetScalarValueFromUtf16(int unused, string input, int expectedResult)
+        {
+            // The 'unused' parameter exists because the xunit runner can't distinguish
+            // the individual malformed data test cases from each other without this
+            // additional identifier.
+
+            fixed (char* pInput = input)
+            {
+                Assert.Equal(expectedResult, UnicodeHelpers.GetScalarValueFromUtf16(pInput, endOfString: (input.Length == 1)));
+            }
+        }
+
+        [Fact]
+        public void GetUtf8RepresentationForScalarValue()
+        {
+            for (int i = 0; i <= 0x10FFFF; i++)
+            {
+                if (i <= 0xFFFF && Char.IsSurrogate((char)i))
+                {
+                    continue; // no surrogates
+                }
+
+                // Arrange
+                byte[] expectedUtf8Bytes = _utf8EncodingThrowOnInvalidBytes.GetBytes(Char.ConvertFromUtf32(i));
+
+                // Act
+                List<byte> actualUtf8Bytes = new List<byte>(4);
+                uint asUtf8 = (uint)UnicodeHelpers.GetUtf8RepresentationForScalarValue((uint)i);
+                do
+                {
+                    actualUtf8Bytes.Add((byte)asUtf8);
+                } while ((asUtf8 >>= 8) != 0);
+
+                // Assert
+                Assert.Equal(expectedUtf8Bytes, actualUtf8Bytes);
+            }
+        }
+
+        [Fact]
+        public void IsCharacterDefined()
+        {
+            // Arrange
+            bool[] definedChars = ReadListOfDefinedCharacters();
+            List<string> errors = new List<string>();
+
+            // Act & assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                bool expected = definedChars[i];
+                bool actual = UnicodeHelpers.IsCharacterDefined((char)i);
+                if (expected != actual)
+                {
+                    string message = String.Format(CultureInfo.InvariantCulture, "Character U+{0:X4}: expected = {1}, actual = {2}", i, expected, actual);
+                    errors.Add(message);
+                }
+            }
+
+            if (errors.Count > 0)
+            {
+                Assert.True(false, String.Join(Environment.NewLine, errors));
+            }
+        }
+
+        private static bool[] ReadListOfDefinedCharacters()
+        {
+            HashSet<string> allowedCategories = new HashSet<string>();
+
+            // Letters
+            allowedCategories.Add("Lu");
+            allowedCategories.Add("Ll");
+            allowedCategories.Add("Lt");
+            allowedCategories.Add("Lm");
+            allowedCategories.Add("Lo");
+
+            // Marks
+            allowedCategories.Add("Mn");
+            allowedCategories.Add("Mc");
+            allowedCategories.Add("Me");
+
+            // Numbers
+            allowedCategories.Add("Nd");
+            allowedCategories.Add("Nl");
+            allowedCategories.Add("No");
+
+            // Punctuation
+            allowedCategories.Add("Pc");
+            allowedCategories.Add("Pd");
+            allowedCategories.Add("Ps");
+            allowedCategories.Add("Pe");
+            allowedCategories.Add("Pi");
+            allowedCategories.Add("Pf");
+            allowedCategories.Add("Po");
+
+            // Symbols
+            allowedCategories.Add("Sm");
+            allowedCategories.Add("Sc");
+            allowedCategories.Add("Sk");
+            allowedCategories.Add("So");
+
+            // Separators
+            // With the exception of U+0020 SPACE, these aren't allowed
+
+            // Other
+            // We only allow one category of 'other' characters
+            allowedCategories.Add("Cf");
+
+            HashSet<string> seenCategories = new HashSet<string>();
+
+            bool[] retVal = new bool[0x10000];
+            string[] allLines = new StreamReader(typeof(UnicodeHelpersTests).GetTypeInfo().Assembly.GetManifestResourceStream("../../unicode/UnicodeData.txt")).ReadAllLines();
+
+            foreach (string line in allLines)
+            {
+                string[] splitLine = line.Split(';');
+                uint codePoint = UInt32.Parse(splitLine[0], NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
+                if (codePoint >= retVal.Length)
+                {
+                    continue; // don't care about supplementary chars
+                }
+
+                if (codePoint == (uint)' ')
+                {
+                    retVal[codePoint] = true; // we allow U+0020 SPACE as our only valid Zs (whitespace) char
+                }
+                else
+                {
+                    string category = splitLine[2];
+                    if (allowedCategories.Contains(category))
+                    {
+                        retVal[codePoint] = true; // chars in this category are allowable
+                        seenCategories.Add(category);
+                    }
+                }
+            }
+
+            // Finally, we need to make sure we've seen every category which contains
+            // allowed characters. This provides extra defense against having a typo
+            // in the list of categories.
+            Assert.Equal(allowedCategories.OrderBy(c => c), seenCategories.OrderBy(c => c));
+
+            return retVal;
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/UrlEncoderTests.cs
+++ b/test/Microsoft.Framework.WebEncoders.Tests/UrlEncoderTests.cs
@ -0,0 +1,302 @@
+// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+    public class UrlEncoderTests
+    {
+        private static UTF8Encoding _utf8EncodingThrowOnInvalidBytes = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
+
+        [Fact]
+        public void Ctor_WithCodePointFilter()
+        {
+            // Arrange
+            var filter = new CodePointFilter(UnicodeBlocks.None).AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
+            UrlEncoder encoder = new UrlEncoder(filter);
+
+            // Act & assert
+            Assert.Equal("a", encoder.UrlEncode("a"));
+            Assert.Equal("b", encoder.UrlEncode("b"));
+            Assert.Equal("%63", encoder.UrlEncode("c"));
+            Assert.Equal("d", encoder.UrlEncode("d"));
+            Assert.Equal("%00", encoder.UrlEncode("\0")); // we still always encode control chars
+            Assert.Equal("%26", encoder.UrlEncode("&")); // we still always encode HTML-special chars
+            Assert.Equal("%EF%BF%BF", encoder.UrlEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
+        }
+
+        [Fact]
+        public void Ctor_WithUnicodeBlocks()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.Latin1Supplement, UnicodeBlocks.MiscellaneousSymbols);
+
+            // Act & assert
+            Assert.Equal("%61", encoder.UrlEncode("a"));
+            Assert.Equal("\u00E9", encoder.UrlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
+            Assert.Equal("\u2601", encoder.UrlEncode("\u2601" /* CLOUD */));
+        }
+
+        [Fact]
+        public void Ctor_WithNoParameters_DefaultsToBasicLatin()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder();
+
+            // Act & assert
+            Assert.Equal("a", encoder.UrlEncode("a"));
+            Assert.Equal("%C3%A9", encoder.UrlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
+            Assert.Equal("%E2%98%81", encoder.UrlEncode("\u2601" /* CLOUD */));
+        }
+
+        [Fact]
+        public void Default_EquivalentToBasicLatin()
+        {
+            // Arrange
+            UrlEncoder controlEncoder = new UrlEncoder(UnicodeBlocks.BasicLatin);
+            UrlEncoder testEncoder = UrlEncoder.Default;
+
+            // Act & assert
+            for (int i = 0; i <= Char.MaxValue; i++)
+            {
+                if (!IsSurrogateCodePoint(i))
+                {
+                    string input = new String((char)i, 1);
+                    Assert.Equal(controlEncoder.UrlEncode(input), testEncoder.UrlEncode(input));
+                }
+            }
+        }
+
+        [Fact]
+        public void Default_ReturnsSingletonInstance()
+        {
+            // Act
+            UrlEncoder encoder1 = UrlEncoder.Default;
+            UrlEncoder encoder2 = UrlEncoder.Default;
+
+            // Assert
+            Assert.Same(encoder1, encoder2);
+        }
+
+        [Fact]
+        public void UrlEncode_AllRangesAllowed_StillEncodesForbiddenChars()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All);
+
+            // Act & assert - BMP chars
+            for (int i = 0; i <= 0xFFFF; i++)
+            {
+                string input = new String((char)i, 1);
+                string expected;
+                if (IsSurrogateCodePoint(i))
+                {
+                    expected = "%EF%BF%BD"; // unpaired surrogate -> Unicode replacement char
+                }
+                else
+                {
+                    bool mustEncode = true;
+
+                    // RFC 3987, Sec. 2.2 gives the list of allowed chars
+                    // (We allow 'ipchar' except for "'", "&", "+", "%", and "="
+                    if (('a' <= i && i <= 'z') || ('A' <= i && i <= 'Z') || ('0' <= i && i <= '9'))
+                    {
+                        mustEncode = false; // ALPHA / DIGIT
+                    }
+                    else if ((0x00A0 <= i && i <= 0xD7FF) | (0xF900 <= i && i <= 0xFDCF) | (0xFDF0 <= i && i <= 0xFFEF))
+                    {
+                        mustEncode = !UnicodeHelpers.IsCharacterDefined((char)i); // 'ucschar'
+                    }
+                    else
+                    {
+                        switch (i)
+                        {
+                            // iunreserved
+                            case '-':
+                            case '.':
+                            case '_':
+                            case '~':
+
+                            // ipchar
+                            case ':':
+                            case '@':
+
+                            // sub-delims
+                            case '!':
+                            case '$':
+                            case '(':
+                            case ')':
+                            case '*':
+                            case ',':
+                            case ';':
+                                mustEncode = false;
+                                break;
+                        }
+                    }
+
+                    if (mustEncode)
+                    {
+                        expected = GetKnownGoodPercentEncodedValue(i);
+                    }
+                    else
+                    {
+                        expected = input; // no encoding
+                    }
+                }
+
+                string retVal = encoder.UrlEncode(input);
+                Assert.Equal(expected, retVal);
+            }
+
+            // Act & assert - astral chars
+            for (int i = 0x10000; i <= 0x10FFFF; i++)
+            {
+                string input = Char.ConvertFromUtf32(i);
+                string expected = GetKnownGoodPercentEncodedValue(i);
+                string retVal = encoder.UrlEncode(input);
+                Assert.Equal(expected, retVal);
+            }
+        }
+
+        [Fact]
+        public void UrlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder(UnicodeBlocks.All); // allow all codepoints
+
+            // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
+            const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
+            const string expected = "a%EF%BF%BDb%EF%BF%BDc%EF%BF%BD%EF%BF%BDd%EF%BF%BD%F0%90%8F%BFe%EF%BF%BD"; // 'D800' 'DFFF' was preserved since it's valid
+
+            // Act
+            string retVal = encoder.UrlEncode(input);
+
+            // Assert
+            Assert.Equal(expected, retVal);
+        }
+
+        [Fact]
+        public void UrlEncode_EmptyStringInput_ReturnsEmptyString()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder();
+
+            // Act & assert
+            Assert.Equal("", encoder.UrlEncode(""));
+        }
+
+        [Fact]
+        public void UrlEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder();
+            string input = "Hello,there!";
+
+            // Act & assert
+            Assert.Same(input, encoder.UrlEncode(input));
+        }
+
+        [Fact]
+        public void UrlEncode_NullInput_ReturnsNull()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder();
+
+            // Act & assert
+            Assert.Null(encoder.UrlEncode(null));
+        }
+
+        [Fact]
+        public void UrlEncode_WithCharsRequiringEncodingAtBeginning()
+        {
+            Assert.Equal(@"%26Hello,there!", new UrlEncoder().UrlEncode("&Hello,there!"));
+        }
+
+        [Fact]
+        public void UrlEncode_WithCharsRequiringEncodingAtEnd()
+        {
+            Assert.Equal(@"Hello,there!%26", new UrlEncoder().UrlEncode("Hello,there!&"));
+        }
+
+        [Fact]
+        public void UrlEncode_WithCharsRequiringEncodingInMiddle()
+        {
+            Assert.Equal(@"Hello,%20%26there!", new UrlEncoder().UrlEncode("Hello, &there!"));
+        }
+
+        [Fact]
+        public void UrlEncode_WithCharsRequiringEncodingInterspersed()
+        {
+            Assert.Equal(@"Hello,%20%3Cthere%3E!", new UrlEncoder().UrlEncode("Hello, <there>!"));
+        }
+
+        [Fact]
+        public void UrlEncode_CharArray()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder();
+            var output = new StringWriter();
+
+            // Act
+            encoder.UrlEncode("Hello+world!".ToCharArray(), 3, 5, output);
+
+            // Assert
+            Assert.Equal("lo%2Bwo", output.ToString());
+        }
+
+        [Fact]
+        public void UrlEncode_StringSubstring()
+        {
+            // Arrange
+            UrlEncoder encoder = new UrlEncoder();
+            var output = new StringWriter();
+
+            // Act
+            encoder.UrlEncode("Hello+world!", 3, 5, output);
+
+            // Assert
+            Assert.Equal("lo%2Bwo", output.ToString());
+        }
+
+        [Fact]
+        public void UrlEncode_DoesNotOutputHtmlSensitiveCharacters()
+        {
+            // Per the design document, we provide additional defense-in-depth
+            // by never emitting HTML-sensitive characters unescaped.
+
+            // Arrange
+            UrlEncoder urlEncoder = new UrlEncoder(UnicodeBlocks.All);
+            HtmlEncoder htmlEncoder = new HtmlEncoder(UnicodeBlocks.All);
+
+            // Act & assert
+            for (int i = 0; i <= 0x10FFFF; i++)
+            {
+                if (IsSurrogateCodePoint(i))
+                {
+                    continue; // surrogates don't matter here
+                }
+
+                string urlEncoded = urlEncoder.UrlEncode(Char.ConvertFromUtf32(i));
+                string thenHtmlEncoded = htmlEncoder.HtmlEncode(urlEncoded);
+                Assert.Equal(urlEncoded, thenHtmlEncoded); // should have contained no HTML-sensitive characters
+            }
+        }
+
+        private static string GetKnownGoodPercentEncodedValue(int codePoint)
+        {
+            // Convert the code point to UTF16, then call Encoding.UTF8.GetBytes, then hex-encode everything
+            return String.Concat(_utf8EncodingThrowOnInvalidBytes.GetBytes(Char.ConvertFromUtf32(codePoint)).Select(b => String.Format(CultureInfo.InvariantCulture, "%{0:X2}", b)));
+        }
+
+        private static bool IsSurrogateCodePoint(int codePoint)
+        {
+            return (0xD800 <= codePoint && codePoint <= 0xDFFF);
+        }
+    }
+}
--- a/test/Microsoft.Framework.WebEncoders.Tests/project.json
+++ b/test/Microsoft.Framework.WebEncoders.Tests/project.json
@ -0,0 +1,18 @@
+{
+    "dependencies": {
+        "Microsoft.Framework.WebEncoders": "1.0.0-*",
+        "Moq": "4.2.1312.1622",
+        "Newtonsoft.Json": "6.0.6",
+        "xunit.runner.kre": "1.0.0-*"
+    },
+    "commands": {
+        "test": "xunit.runner.kre"
+    },
+    "compilationOptions": {
+        "allowUnsafe": true
+    },
+    "frameworks": {
+        "aspnet50": { }
+    },
+    "resources": "..\\..\\unicode\\UnicodeData.txt"
+}
--- a/unicode/Blocks.txt
+++ b/unicode/Blocks.txt
@ -0,0 +1,283 @@
+# Blocks-7.0.0.txt
+# Date: 2014-04-03, 23:23:00 GMT [RP, KW]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2014 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Note:   The casing of block names is not normative.
+#         For example, "Basic Latin" and "BASIC LATIN" are equivalent.
+#
+# Format:
+# Start Code..End Code; Block Name
+
+# ================================================
+
+# Note:   When comparing block names, casing, whitespace, hyphens,
+#         and underbars are ignored.
+#         For example, "Latin Extended-A" and "latin extended a" are equivalent.
+#         For more information on the comparison of property values, 
+#            see UAX #44: http://www.unicode.org/reports/tr44/
+#
+#  All code points not explicitly listed for Block
+#  have the value No_Block.
+
+# Property:	Block
+#
+# @missing: 0000..10FFFF; No_Block
+
+0000..007F; Basic Latin
+0080..00FF; Latin-1 Supplement
+0100..017F; Latin Extended-A
+0180..024F; Latin Extended-B
+0250..02AF; IPA Extensions
+02B0..02FF; Spacing Modifier Letters
+0300..036F; Combining Diacritical Marks
+0370..03FF; Greek and Coptic
+0400..04FF; Cyrillic
+0500..052F; Cyrillic Supplement
+0530..058F; Armenian
+0590..05FF; Hebrew
+0600..06FF; Arabic
+0700..074F; Syriac
+0750..077F; Arabic Supplement
+0780..07BF; Thaana
+07C0..07FF; NKo
+0800..083F; Samaritan
+0840..085F; Mandaic
+08A0..08FF; Arabic Extended-A
+0900..097F; Devanagari
+0980..09FF; Bengali
+0A00..0A7F; Gurmukhi
+0A80..0AFF; Gujarati
+0B00..0B7F; Oriya
+0B80..0BFF; Tamil
+0C00..0C7F; Telugu
+0C80..0CFF; Kannada
+0D00..0D7F; Malayalam
+0D80..0DFF; Sinhala
+0E00..0E7F; Thai
+0E80..0EFF; Lao
+0F00..0FFF; Tibetan
+1000..109F; Myanmar
+10A0..10FF; Georgian
+1100..11FF; Hangul Jamo
+1200..137F; Ethiopic
+1380..139F; Ethiopic Supplement
+13A0..13FF; Cherokee
+1400..167F; Unified Canadian Aboriginal Syllabics
+1680..169F; Ogham
+16A0..16FF; Runic
+1700..171F; Tagalog
+1720..173F; Hanunoo
+1740..175F; Buhid
+1760..177F; Tagbanwa
+1780..17FF; Khmer
+1800..18AF; Mongolian
+18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
+1900..194F; Limbu
+1950..197F; Tai Le
+1980..19DF; New Tai Lue
+19E0..19FF; Khmer Symbols
+1A00..1A1F; Buginese
+1A20..1AAF; Tai Tham
+1AB0..1AFF; Combining Diacritical Marks Extended
+1B00..1B7F; Balinese
+1B80..1BBF; Sundanese
+1BC0..1BFF; Batak
+1C00..1C4F; Lepcha
+1C50..1C7F; Ol Chiki
+1CC0..1CCF; Sundanese Supplement
+1CD0..1CFF; Vedic Extensions
+1D00..1D7F; Phonetic Extensions
+1D80..1DBF; Phonetic Extensions Supplement
+1DC0..1DFF; Combining Diacritical Marks Supplement
+1E00..1EFF; Latin Extended Additional
+1F00..1FFF; Greek Extended
+2000..206F; General Punctuation
+2070..209F; Superscripts and Subscripts
+20A0..20CF; Currency Symbols
+20D0..20FF; Combining Diacritical Marks for Symbols
+2100..214F; Letterlike Symbols
+2150..218F; Number Forms
+2190..21FF; Arrows
+2200..22FF; Mathematical Operators
+2300..23FF; Miscellaneous Technical
+2400..243F; Control Pictures
+2440..245F; Optical Character Recognition
+2460..24FF; Enclosed Alphanumerics
+2500..257F; Box Drawing
+2580..259F; Block Elements
+25A0..25FF; Geometric Shapes
+2600..26FF; Miscellaneous Symbols
+2700..27BF; Dingbats
+27C0..27EF; Miscellaneous Mathematical Symbols-A
+27F0..27FF; Supplemental Arrows-A
+2800..28FF; Braille Patterns
+2900..297F; Supplemental Arrows-B
+2980..29FF; Miscellaneous Mathematical Symbols-B
+2A00..2AFF; Supplemental Mathematical Operators
+2B00..2BFF; Miscellaneous Symbols and Arrows
+2C00..2C5F; Glagolitic
+2C60..2C7F; Latin Extended-C
+2C80..2CFF; Coptic
+2D00..2D2F; Georgian Supplement
+2D30..2D7F; Tifinagh
+2D80..2DDF; Ethiopic Extended
+2DE0..2DFF; Cyrillic Extended-A
+2E00..2E7F; Supplemental Punctuation
+2E80..2EFF; CJK Radicals Supplement
+2F00..2FDF; Kangxi Radicals
+2FF0..2FFF; Ideographic Description Characters
+3000..303F; CJK Symbols and Punctuation
+3040..309F; Hiragana
+30A0..30FF; Katakana
+3100..312F; Bopomofo
+3130..318F; Hangul Compatibility Jamo
+3190..319F; Kanbun
+31A0..31BF; Bopomofo Extended
+31C0..31EF; CJK Strokes
+31F0..31FF; Katakana Phonetic Extensions
+3200..32FF; Enclosed CJK Letters and Months
+3300..33FF; CJK Compatibility
+3400..4DBF; CJK Unified Ideographs Extension A
+4DC0..4DFF; Yijing Hexagram Symbols
+4E00..9FFF; CJK Unified Ideographs
+A000..A48F; Yi Syllables
+A490..A4CF; Yi Radicals
+A4D0..A4FF; Lisu
+A500..A63F; Vai
+A640..A69F; Cyrillic Extended-B
+A6A0..A6FF; Bamum
+A700..A71F; Modifier Tone Letters
+A720..A7FF; Latin Extended-D
+A800..A82F; Syloti Nagri
+A830..A83F; Common Indic Number Forms
+A840..A87F; Phags-pa
+A880..A8DF; Saurashtra
+A8E0..A8FF; Devanagari Extended
+A900..A92F; Kayah Li
+A930..A95F; Rejang
+A960..A97F; Hangul Jamo Extended-A
+A980..A9DF; Javanese
+A9E0..A9FF; Myanmar Extended-B
+AA00..AA5F; Cham
+AA60..AA7F; Myanmar Extended-A
+AA80..AADF; Tai Viet
+AAE0..AAFF; Meetei Mayek Extensions
+AB00..AB2F; Ethiopic Extended-A
+AB30..AB6F; Latin Extended-E
+ABC0..ABFF; Meetei Mayek
+AC00..D7AF; Hangul Syllables
+D7B0..D7FF; Hangul Jamo Extended-B
+D800..DB7F; High Surrogates
+DB80..DBFF; High Private Use Surrogates
+DC00..DFFF; Low Surrogates
+E000..F8FF; Private Use Area
+F900..FAFF; CJK Compatibility Ideographs
+FB00..FB4F; Alphabetic Presentation Forms
+FB50..FDFF; Arabic Presentation Forms-A
+FE00..FE0F; Variation Selectors
+FE10..FE1F; Vertical Forms
+FE20..FE2F; Combining Half Marks
+FE30..FE4F; CJK Compatibility Forms
+FE50..FE6F; Small Form Variants
+FE70..FEFF; Arabic Presentation Forms-B
+FF00..FFEF; Halfwidth and Fullwidth Forms
+FFF0..FFFF; Specials
+10000..1007F; Linear B Syllabary
+10080..100FF; Linear B Ideograms
+10100..1013F; Aegean Numbers
+10140..1018F; Ancient Greek Numbers
+10190..101CF; Ancient Symbols
+101D0..101FF; Phaistos Disc
+10280..1029F; Lycian
+102A0..102DF; Carian
+102E0..102FF; Coptic Epact Numbers
+10300..1032F; Old Italic
+10330..1034F; Gothic
+10350..1037F; Old Permic
+10380..1039F; Ugaritic
+103A0..103DF; Old Persian
+10400..1044F; Deseret
+10450..1047F; Shavian
+10480..104AF; Osmanya
+10500..1052F; Elbasan
+10530..1056F; Caucasian Albanian
+10600..1077F; Linear A
+10800..1083F; Cypriot Syllabary
+10840..1085F; Imperial Aramaic
+10860..1087F; Palmyrene
+10880..108AF; Nabataean
+10900..1091F; Phoenician
+10920..1093F; Lydian
+10980..1099F; Meroitic Hieroglyphs
+109A0..109FF; Meroitic Cursive
+10A00..10A5F; Kharoshthi
+10A60..10A7F; Old South Arabian
+10A80..10A9F; Old North Arabian
+10AC0..10AFF; Manichaean
+10B00..10B3F; Avestan
+10B40..10B5F; Inscriptional Parthian
+10B60..10B7F; Inscriptional Pahlavi
+10B80..10BAF; Psalter Pahlavi
+10C00..10C4F; Old Turkic
+10E60..10E7F; Rumi Numeral Symbols
+11000..1107F; Brahmi
+11080..110CF; Kaithi
+110D0..110FF; Sora Sompeng
+11100..1114F; Chakma
+11150..1117F; Mahajani
+11180..111DF; Sharada
+111E0..111FF; Sinhala Archaic Numbers
+11200..1124F; Khojki
+112B0..112FF; Khudawadi
+11300..1137F; Grantha
+11480..114DF; Tirhuta
+11580..115FF; Siddham
+11600..1165F; Modi
+11680..116CF; Takri
+118A0..118FF; Warang Citi
+11AC0..11AFF; Pau Cin Hau
+12000..123FF; Cuneiform
+12400..1247F; Cuneiform Numbers and Punctuation
+13000..1342F; Egyptian Hieroglyphs
+16800..16A3F; Bamum Supplement
+16A40..16A6F; Mro
+16AD0..16AFF; Bassa Vah
+16B00..16B8F; Pahawh Hmong
+16F00..16F9F; Miao
+1B000..1B0FF; Kana Supplement
+1BC00..1BC9F; Duployan
+1BCA0..1BCAF; Shorthand Format Controls
+1D000..1D0FF; Byzantine Musical Symbols
+1D100..1D1FF; Musical Symbols
+1D200..1D24F; Ancient Greek Musical Notation
+1D300..1D35F; Tai Xuan Jing Symbols
+1D360..1D37F; Counting Rod Numerals
+1D400..1D7FF; Mathematical Alphanumeric Symbols
+1E800..1E8DF; Mende Kikakui
+1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
+1F000..1F02F; Mahjong Tiles
+1F030..1F09F; Domino Tiles
+1F0A0..1F0FF; Playing Cards
+1F100..1F1FF; Enclosed Alphanumeric Supplement
+1F200..1F2FF; Enclosed Ideographic Supplement
+1F300..1F5FF; Miscellaneous Symbols and Pictographs
+1F600..1F64F; Emoticons
+1F650..1F67F; Ornamental Dingbats
+1F680..1F6FF; Transport and Map Symbols
+1F700..1F77F; Alchemical Symbols
+1F780..1F7FF; Geometric Shapes Extended
+1F800..1F8FF; Supplemental Arrows-C
+20000..2A6DF; CJK Unified Ideographs Extension B
+2A700..2B73F; CJK Unified Ideographs Extension C
+2B740..2B81F; CJK Unified Ideographs Extension D
+2F800..2FA1F; CJK Compatibility Ideographs Supplement
+E0000..E007F; Tags
+E0100..E01EF; Variation Selectors Supplement
+F0000..FFFFF; Supplementary Private Use Area-A
+100000..10FFFF; Supplementary Private Use Area-B
+
+# EOF
--- a/unicode/Generators/DefinedCharListGenerator/App.config
+++ b/unicode/Generators/DefinedCharListGenerator/App.config
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.1" />
+    </startup>
+</configuration>
--- a/unicode/Generators/DefinedCharListGenerator/DefinedCharListGenerator.csproj
+++ b/unicode/Generators/DefinedCharListGenerator/DefinedCharListGenerator.csproj
@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>DefinedCharListGenerator</RootNamespace>
+    <AssemblyName>DefinedCharListGenerator</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="Microsoft.CSharp" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Program.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="..\..\UnicodeData.txt">
+      <Link>UnicodeData.txt</Link>
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
--- a/unicode/Generators/DefinedCharListGenerator/Program.cs
+++ b/unicode/Generators/DefinedCharListGenerator/Program.cs
@ -0,0 +1,107 @@
+using System;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+
+namespace DefinedCharListGenerator
+{
+    /// <summary>
+    /// This program outputs the 'unicode-defined-chars.bin' bitmap file.
+    /// </summary>
+    class Program
+    {
+        static void Main(string[] args)
+        {
+            // The input file should be UnicodeData.txt from the UCD corresponding to the
+            // version of the Unicode spec we're consuming.
+            // More info: http://www.unicode.org/reports/tr44/tr44-14.html#UCD_Files
+            // Latest UnicodeData.txt: http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+
+            const uint MAX_UNICODE_CHAR = 0x10FFFF; // Unicode range is U+0000 .. U+10FFFF
+            bool[] definedChars = new bool[MAX_UNICODE_CHAR + 1];
+
+            // Read all defined characters from the input file.
+            string[] allLines = File.ReadAllLines("UnicodeData.txt");
+
+            // Each line is a semicolon-delimited list of information:
+            // <value>;<name>;<category>;...
+            foreach (string line in allLines)
+            {
+                string[] splitLine = line.Split(new char[] { ';' }, 4);
+
+                // We only allow certain categories of code points.
+                // Zs (space separators) aren't included, but we allow U+0020 SPACE as a special case
+                uint codepoint = uint.Parse(splitLine[0], NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
+                string category = splitLine[2];
+                if (!(codepoint == (uint)' ' || IsAllowedUnicodeCategory(category)))
+                {
+                    continue;
+                }
+
+                Debug.Assert(codepoint <= MAX_UNICODE_CHAR);
+                definedChars[codepoint] = true;
+            }
+
+            // Finally, write the list of defined characters out as a bitmap.
+            // Each consecutive block of 8 chars is written as a single byte.
+            // For instance, the first byte of the output file contains the
+            // bitmap for the following codepoints:
+            // - (bit 7) U+0007 [MSB]
+            // - (bit 6) U+0006
+            // - (bit 5) U+0005
+            // - (bit 4) U+0004
+            // - (bit 3) U+0003
+            // - (bit 2) U+0002
+            // - (bit 1) U+0001
+            // - (bit 0) U+0000 [LSB]
+            // The next byte will contain the bitmap for U+000F to U+0008,
+            // and so on until the last byte, which is U+FFFF to U+FFF8.
+            // The bytes are written out in little-endian order.
+            // We're only concerned about the BMP (U+0000 .. U+FFFF) for now.
+            MemoryStream outBuffer = new MemoryStream();
+            for (int i = 0; i < 0x10000; i += 8)
+            {
+                int thisByte = 0;
+                for (int j = 7; j >= 0; j--)
+                {
+                    thisByte <<= 1;
+                    if (definedChars[i + j])
+                    {
+                        thisByte |= 0x1;
+                    }
+                }
+                outBuffer.WriteByte((byte)thisByte);
+            }
+
+            File.WriteAllBytes("unicode-defined-chars.bin", outBuffer.ToArray());
+        }
+
+        private static bool IsAllowedUnicodeCategory(string category)
+        {
+            // We only allow certain classes of characters
+            return category == "Lu" /* letters */
+                || category == "Ll"
+                || category == "Lt"
+                || category == "Lm"
+                || category == "Lo"
+                || category == "Mn" /* marks */
+                || category == "Mc"
+                || category == "Me"
+                || category == "Nd" /* numbers */
+                || category == "Nl"
+                || category == "No"
+                || category == "Pc" /* punctuation */
+                || category == "Pd"
+                || category == "Ps"
+                || category == "Pe"
+                || category == "Pi"
+                || category == "Pf"
+                || category == "Po"
+                || category == "Sm" /* symbols */
+                || category == "Sc"
+                || category == "Sk"
+                || category == "So"
+                || category == "Cf"; /* other */
+        }
+    }
+}
--- a/unicode/Generators/DefinedCharListGenerator/Properties/AssemblyInfo.cs
+++ b/unicode/Generators/DefinedCharListGenerator/Properties/AssemblyInfo.cs
@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("DefinedCharListGenerator")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("DefinedCharListGenerator")]
+[assembly: AssemblyCopyright("Copyright ©  2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("5089f890-38f7-413c-87b0-d8eb1e238ef5")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
--- a/unicode/Generators/Generators.sln
+++ b/unicode/Generators/Generators.sln
@ -0,0 +1,28 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.31101.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DefinedCharListGenerator", "DefinedCharListGenerator\DefinedCharListGenerator.csproj", "{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnicodeTablesGenerator", "UnicodeTablesGenerator\UnicodeTablesGenerator.csproj", "{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{0E87CEC9-46CE-4B6B-A613-93AA773C10A4}.Release|Any CPU.Build.0 = Release|Any CPU
+		{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/unicode/Generators/UnicodeTablesGenerator/App.config
+++ b/unicode/Generators/UnicodeTablesGenerator/App.config
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.1" />
+    </startup>
+</configuration>
--- a/unicode/Generators/UnicodeTablesGenerator/Program.cs
+++ b/unicode/Generators/UnicodeTablesGenerator/Program.cs
@ -0,0 +1,109 @@
+using System;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace UnicodeTablesGenerator
+{
+    /// <summary>
+    /// This program outputs the 'UnicodeBlocks.generated.txt' and
+    /// 'UnicodeBlocksTests.generated.txt' source files.
+    /// </summary>
+    /// <remarks>
+    /// The generated files require some hand-tweaking. For instance, you'll need
+    /// to remove surrogates and private use blocks. The files can then be merged
+    /// into the *.generated.cs files as appropriate.
+    /// </remarks>
+    class Program
+    {
+        private const string _codePointFiltersGeneratedFormat = @"
+/// <summary>
+/// Represents the '{0}' Unicode block (U+{1}..U+{2}).
+/// </summary>
+/// <remarks>
+/// See http://www.unicode.org/charts/PDF/U{1}.pdf for the full set of characters in this block.
+/// </remarks>
+public static UnicodeBlock {3}
+{{
+    get
+    {{
+        return Volatile.Read(ref _{4}) ?? CreateBlock(ref _{4}, first: '\u{1}', last: '\u{2}');
+    }}
+}}
+private static UnicodeBlock _{4};
+";
+
+        private const string _codePointFiltersTestsGeneratedFormat = @"[InlineData('\u{1}', '\u{2}', nameof(UnicodeBlocks.{0}))]";
+
+        private static void Main()
+        {
+            // The input file should be Blocks.txt from the UCD corresponding to the
+            // version of the Unicode spec we're consuming.
+            // More info: http://www.unicode.org/reports/tr44/
+            // Latest Blocks.txt: http://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
+
+            StringBuilder runtimeCodeBuilder = new StringBuilder();
+            StringBuilder testCodeBuilder = new StringBuilder();
+            string[] allLines = File.ReadAllLines("Blocks.txt");
+
+            Regex regex = new Regex(@"^(?<startCode>[0-9A-F]{4})\.\.(?<endCode>[0-9A-F]{4}); (?<blockName>.+)$");
+
+            foreach (var line in allLines)
+            {
+                // We only care about lines of the form "XXXX..XXXX; Block name"
+                var match = regex.Match(line);
+                if (match == null || !match.Success)
+                {
+                    continue;
+                }
+
+                string startCode = match.Groups["startCode"].Value;
+                string endCode = match.Groups["endCode"].Value;
+                string blockName = match.Groups["blockName"].Value;
+                string blockNameAsProperty = RemoveAllNonAlphanumeric(blockName);
+                string blockNameAsField = WithDotNetFieldCasing(blockNameAsProperty);
+
+                runtimeCodeBuilder.AppendFormat(CultureInfo.InvariantCulture, _codePointFiltersGeneratedFormat,
+                    blockName, startCode, endCode, blockNameAsProperty, blockNameAsField);
+
+                testCodeBuilder.AppendFormat(CultureInfo.InvariantCulture, _codePointFiltersTestsGeneratedFormat,
+                    blockNameAsProperty, startCode, endCode);
+                testCodeBuilder.AppendLine();
+            }
+
+            File.WriteAllText("UnicodeBlocks.generated.txt", runtimeCodeBuilder.ToString());
+            File.WriteAllText("UnicodeBlocksTests.generated.txt", testCodeBuilder.ToString());
+        }
+
+        private static string RemoveAllNonAlphanumeric(string blockName)
+        {
+            // Allow only A-Z 0-9
+            return new String(blockName.ToCharArray().Where(c => ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')).ToArray());
+        }
+
+        private static string WithDotNetFieldCasing(string input)
+        {
+            char[] chars = input.ToCharArray();
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (Char.IsLower(chars[i]))
+                {
+                    if (i > 1)
+                    {
+                        // restore original casing for the previous char unless the previous
+                        // char was at the front of the string
+                        chars[i - 1] = input[i - 1];
+                    }
+                    break;
+                }
+                else
+                {
+                    chars[i] = Char.ToLowerInvariant(chars[i]);
+                }
+            }
+            return new String(chars);
+        }
+    }
+}
--- a/unicode/Generators/UnicodeTablesGenerator/Properties/AssemblyInfo.cs
+++ b/unicode/Generators/UnicodeTablesGenerator/Properties/AssemblyInfo.cs
@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("UnicodeTablesGenerator")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("UnicodeTablesGenerator")]
+[assembly: AssemblyCopyright("Copyright ©  2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("c9286457-3d25-4143-9458-028aabedc4f5")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
--- a/unicode/Generators/UnicodeTablesGenerator/UnicodeTablesGenerator.csproj
+++ b/unicode/Generators/UnicodeTablesGenerator/UnicodeTablesGenerator.csproj
@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{3D181114-6946-4D34-A3B9-0F83B6B8FEAE}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>UnicodeTablesGenerator</RootNamespace>
+    <AssemblyName>UnicodeTablesGenerator</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="Microsoft.CSharp" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Program.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="..\..\Blocks.txt">
+      <Link>Blocks.txt</Link>
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
--- a/unicode/UnicodeData.txt
+++ b/unicode/UnicodeData.txt
--- a/unicode/unicode-copyright.txt
+++ b/unicode/unicode-copyright.txt
@ -0,0 +1,47 @@
+The files Blocks.txt and UnicodeData.txt in this directory were
+retrieved from the following URLs on Saturday, February 7, 2015.
+
+http://www.unicode.org/Public/7.0.0/ucd/Blocks.txt
+http://www.unicode.org/Public/7.0.0/ucd/UnicodeData.txt
+
+The below copyright notice applies to these files.
+
+========================================================================
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2015 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in 
+http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that
+(a) this copyright and permission notice appear with all copies 
+of the Data Files or Software,
+(b) this copyright and permission notice appear in associated 
+documentation, and
+(c) there is clear notice in each modified Data File or in the Software
+as well as in the documentation associated with the Data File(s) or
+Software that the data or software has been modified.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.