270 lines
9.1 KiB
C#
270 lines
9.1 KiB
C#
// Copyright (c) .NET Foundation. All rights reserved.
|
|
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
|
|
|
using System;
|
|
using System.Globalization;
|
|
using System.IO;
|
|
using Xunit;
|
|
|
|
namespace Microsoft.Extensions.WebEncoders
|
|
{
|
|
public class HtmlEncoderTests
|
|
{
|
|
[Fact]
|
|
public void Ctor_WithCodePointFilter()
|
|
{
|
|
// Arrange
|
|
var filter = new CodePointFilter().AllowChars("ab").AllowChars('\0', '&', '\uFFFF', 'd');
|
|
HtmlEncoder encoder = new HtmlEncoder(filter);
|
|
|
|
// Act & assert
|
|
Assert.Equal("a", encoder.HtmlEncode("a"));
|
|
Assert.Equal("b", encoder.HtmlEncode("b"));
|
|
Assert.Equal("c", encoder.HtmlEncode("c"));
|
|
Assert.Equal("d", encoder.HtmlEncode("d"));
|
|
Assert.Equal("�", encoder.HtmlEncode("\0")); // we still always encode control chars
|
|
Assert.Equal("&", encoder.HtmlEncode("&")); // we still always encode HTML-special chars
|
|
Assert.Equal("", encoder.HtmlEncode("\uFFFF")); // we still always encode non-chars and other forbidden chars
|
|
}
|
|
|
|
[Fact]
|
|
public void Ctor_WithUnicodeRanges()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols);
|
|
|
|
// Act & assert
|
|
Assert.Equal("a", encoder.HtmlEncode("a"));
|
|
Assert.Equal("\u00E9", encoder.HtmlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
|
Assert.Equal("\u2601", encoder.HtmlEncode("\u2601" /* CLOUD */));
|
|
}
|
|
|
|
[Fact]
|
|
public void Ctor_WithNoParameters_DefaultsToBasicLatin()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder();
|
|
|
|
// Act & assert
|
|
Assert.Equal("a", encoder.HtmlEncode("a"));
|
|
Assert.Equal("é", encoder.HtmlEncode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
|
|
Assert.Equal("☁", encoder.HtmlEncode("\u2601" /* CLOUD */));
|
|
}
|
|
|
|
[Fact]
|
|
public void Default_EquivalentToBasicLatin()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder controlEncoder = new HtmlEncoder(UnicodeRanges.BasicLatin);
|
|
HtmlEncoder testEncoder = HtmlEncoder.Default;
|
|
|
|
// Act & assert
|
|
for (int i = 0; i <= Char.MaxValue; i++)
|
|
{
|
|
if (!IsSurrogateCodePoint(i))
|
|
{
|
|
string input = new String((char)i, 1);
|
|
Assert.Equal(controlEncoder.HtmlEncode(input), testEncoder.HtmlEncode(input));
|
|
}
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void Default_ReturnsSingletonInstance()
|
|
{
|
|
// Act
|
|
HtmlEncoder encoder1 = HtmlEncoder.Default;
|
|
HtmlEncoder encoder2 = HtmlEncoder.Default;
|
|
|
|
// Assert
|
|
Assert.Same(encoder1, encoder2);
|
|
}
|
|
|
|
[Theory]
|
|
[InlineData("<", "<")]
|
|
[InlineData(">", ">")]
|
|
[InlineData("&", "&")]
|
|
[InlineData("'", "'")]
|
|
[InlineData("\"", """)]
|
|
[InlineData("+", "+")]
|
|
public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Simple(string input, string expected)
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All);
|
|
|
|
// Act
|
|
string retVal = encoder.HtmlEncode(input);
|
|
|
|
// Assert
|
|
Assert.Equal(expected, retVal);
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All);
|
|
|
|
// Act & assert - BMP chars
|
|
for (int i = 0; i <= 0xFFFF; i++)
|
|
{
|
|
string input = new String((char)i, 1);
|
|
string expected;
|
|
if (IsSurrogateCodePoint(i))
|
|
{
|
|
expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
|
|
}
|
|
else
|
|
{
|
|
if (input == "<") { expected = "<"; }
|
|
else if (input == ">") { expected = ">"; }
|
|
else if (input == "&") { expected = "&"; }
|
|
else if (input == "\"") { expected = """; }
|
|
else
|
|
{
|
|
bool mustEncode = false;
|
|
if (i == '\'' || i == '+')
|
|
{
|
|
mustEncode = true; // apostrophe, plus
|
|
}
|
|
else if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
|
|
{
|
|
mustEncode = true; // control char
|
|
}
|
|
else if (!UnicodeHelpers.IsCharacterDefined((char)i))
|
|
{
|
|
mustEncode = true; // undefined (or otherwise disallowed) char
|
|
}
|
|
|
|
if (mustEncode)
|
|
{
|
|
expected = String.Format(CultureInfo.InvariantCulture, "&#x{0:X};", i);
|
|
}
|
|
else
|
|
{
|
|
expected = input; // no encoding
|
|
}
|
|
}
|
|
}
|
|
|
|
string retVal = encoder.HtmlEncode(input);
|
|
Assert.Equal(expected, retVal);
|
|
}
|
|
|
|
// Act & assert - astral chars
|
|
for (int i = 0x10000; i <= 0x10FFFF; i++)
|
|
{
|
|
string input = Char.ConvertFromUtf32(i);
|
|
string expected = String.Format(CultureInfo.InvariantCulture, "&#x{0:X};", i);
|
|
string retVal = encoder.HtmlEncode(input);
|
|
Assert.Equal(expected, retVal);
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder(UnicodeRanges.All); // allow all codepoints
|
|
|
|
// "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
|
|
const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
|
|
const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD𐏿e\uFFFD";
|
|
|
|
// Act
|
|
string retVal = encoder.HtmlEncode(input);
|
|
|
|
// Assert
|
|
Assert.Equal(expected, retVal);
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_EmptyStringInput_ReturnsEmptyString()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder();
|
|
|
|
// Act & assert
|
|
Assert.Equal("", encoder.HtmlEncode(""));
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder();
|
|
string input = "Hello, there!";
|
|
|
|
// Act & assert
|
|
Assert.Same(input, encoder.HtmlEncode(input));
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_NullInput_ReturnsNull()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder();
|
|
|
|
// Act & assert
|
|
Assert.Null(encoder.HtmlEncode(null));
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_WithCharsRequiringEncodingAtBeginning()
|
|
{
|
|
Assert.Equal("&Hello, there!", new HtmlEncoder().HtmlEncode("&Hello, there!"));
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_WithCharsRequiringEncodingAtEnd()
|
|
{
|
|
Assert.Equal("Hello, there!&", new HtmlEncoder().HtmlEncode("Hello, there!&"));
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_WithCharsRequiringEncodingInMiddle()
|
|
{
|
|
Assert.Equal("Hello, &there!", new HtmlEncoder().HtmlEncode("Hello, &there!"));
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_WithCharsRequiringEncodingInterspersed()
|
|
{
|
|
Assert.Equal("Hello, <there>!", new HtmlEncoder().HtmlEncode("Hello, <there>!"));
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_CharArray()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder();
|
|
var output = new StringWriter();
|
|
|
|
// Act
|
|
encoder.HtmlEncode("Hello+world!".ToCharArray(), 3, 5, output);
|
|
|
|
// Assert
|
|
Assert.Equal("lo+wo", output.ToString());
|
|
}
|
|
|
|
[Fact]
|
|
public void HtmlEncode_StringSubstring()
|
|
{
|
|
// Arrange
|
|
HtmlEncoder encoder = new HtmlEncoder();
|
|
var output = new StringWriter();
|
|
|
|
// Act
|
|
encoder.HtmlEncode("Hello+world!", 3, 5, output);
|
|
|
|
// Assert
|
|
Assert.Equal("lo+wo", output.ToString());
|
|
}
|
|
|
|
private static bool IsSurrogateCodePoint(int codePoint)
|
|
{
|
|
return (0xD800 <= codePoint && codePoint <= 0xDFFF);
|
|
}
|
|
}
|
|
}
|