// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Diagnostics;
using System.IO;
using System.Threading;
namespace Microsoft.AspNet.WebUtilities.Encoders
{
///
/// A class which can perform JavaScript string escaping given an allow list of characters which
/// can be represented unescaped.
///
///
/// Once constructed, instances of this class are thread-safe for multiple callers.
///
public sealed class JavaScriptStringEncoder : IJavaScriptStringEncoder
{
// The default JavaScript string encoder (Basic Latin), instantiated on demand
private static JavaScriptStringEncoder _defaultEncoder;
// The inner encoder, responsible for the actual encoding routines
private readonly JavaScriptStringUnicodeEncoder _innerUnicodeEncoder;
///
/// Instantiates an encoder using the 'Basic Latin' code table as the allow list.
///
public JavaScriptStringEncoder()
: this(JavaScriptStringUnicodeEncoder.BasicLatin)
{
}
///
/// Instantiates an encoder using a custom allow list of characters.
///
public JavaScriptStringEncoder(params ICodePointFilter[] filters)
: this(new JavaScriptStringUnicodeEncoder(filters))
{
}
private JavaScriptStringEncoder(JavaScriptStringUnicodeEncoder innerEncoder)
{
Debug.Assert(innerEncoder != null);
_innerUnicodeEncoder = innerEncoder;
}
///
/// A default instance of the JavaScriptStringEncoder, equivalent to allowing only
/// the 'Basic Latin' character range.
///
public static JavaScriptStringEncoder Default
{
get
{
JavaScriptStringEncoder defaultEncoder = Volatile.Read(ref _defaultEncoder);
if (defaultEncoder == null)
{
defaultEncoder = new JavaScriptStringEncoder();
Volatile.Write(ref _defaultEncoder, defaultEncoder);
}
return defaultEncoder;
}
}
///
/// Everybody's favorite JavaScriptStringEncode routine.
///
public void JavaScriptStringEncode(char[] value, int startIndex, int charCount, TextWriter output)
{
_innerUnicodeEncoder.Encode(value, startIndex, charCount, output);
}
///
/// Everybody's favorite JavaScriptStringEncode routine.
///
public string JavaScriptStringEncode(string value)
{
return _innerUnicodeEncoder.Encode(value);
}
///
/// Everybody's favorite JavaScriptStringEncode routine.
///
public void JavaScriptStringEncode(string value, int startIndex, int charCount, TextWriter output)
{
_innerUnicodeEncoder.Encode(value, startIndex, charCount, output);
}
private sealed class JavaScriptStringUnicodeEncoder : UnicodeEncoderBase
{
// A singleton instance of the basic latin encoder.
private static JavaScriptStringUnicodeEncoder _basicLatinSingleton;
// The worst case encoding is 6 output chars per input char: [input] U+FFFF -> [output] "\uFFFF"
// We don't need to worry about astral code points since they're represented as encoded
// surrogate pairs in the output.
private const int MaxOutputCharsPerInputChar = 6;
internal JavaScriptStringUnicodeEncoder(ICodePointFilter[] filters)
: base(filters, MaxOutputCharsPerInputChar)
{
// The only interesting characters above and beyond what the base encoder
// already covers are the solidus and reverse solidus.
ForbidCharacter('\\');
ForbidCharacter('/');
}
internal static JavaScriptStringUnicodeEncoder BasicLatin
{
get
{
JavaScriptStringUnicodeEncoder encoder = Volatile.Read(ref _basicLatinSingleton);
if (encoder == null)
{
encoder = new JavaScriptStringUnicodeEncoder(new[] { CodePointFilters.BasicLatin });
Volatile.Write(ref _basicLatinSingleton, encoder);
}
return encoder;
}
}
// Writes a scalar value as a JavaScript-escaped character (or sequence of characters).
// See ECMA-262, Sec. 7.8.4, and ECMA-404, Sec. 9
// http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.4
// http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
protected override void WriteEncodedScalar(ref Writer writer, uint value)
{
// ECMA-262 allows encoding U+000B as "\v", but ECMA-404 does not.
// Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/".
// (In ECMA-262 this character is a NonEscape character.)
// HTML-specific characters (including apostrophe and quotes) will
// be written out as numeric entities for defense-in-depth.
// See UnicodeEncoderBase ctor comments for more info.
if (value == (uint)'\b') { writer.Write(@"\b"); }
else if (value == (uint)'\t') { writer.Write(@"\t"); }
else if (value == (uint)'\n') { writer.Write(@"\n"); }
else if (value == (uint)'\f') { writer.Write(@"\f"); }
else if (value == (uint)'\r') { writer.Write(@"\r"); }
else if (value == (uint)'/') { writer.Write(@"\/"); }
else if (value == (uint)'\\') { writer.Write(@"\\"); }
else { WriteEncodedScalarAsNumericEntity(ref writer, value); }
}
// Writes a scalar value as an JavaScript-escaped character (or sequence of characters).
private static void WriteEncodedScalarAsNumericEntity(ref Writer writer, uint value)
{
if (UnicodeHelpers.IsSupplementaryCodePoint((int)value))
{
// Convert this back to UTF-16 and write out both characters.
char leadingSurrogate, trailingSurrogate;
UnicodeHelpers.GetUtf16SurrogatePairFromAstralScalarValue((int)value, out leadingSurrogate, out trailingSurrogate);
WriteEncodedSingleCharacter(ref writer, leadingSurrogate);
WriteEncodedSingleCharacter(ref writer, trailingSurrogate);
}
else
{
// This is only a single character.
WriteEncodedSingleCharacter(ref writer, value);
}
}
// Writes an encoded scalar value (in the BMP) as a JavaScript-escaped character.
private static void WriteEncodedSingleCharacter(ref Writer writer, uint value)
{
Debug.Assert(!UnicodeHelpers.IsSupplementaryCodePoint((int)value), "The incoming value should've been in the BMP.");
// Encode this as 6 chars "\uFFFF".
writer.Write('\\');
writer.Write('u');
writer.Write(HexUtil.IntToChar(value >> 12));
writer.Write(HexUtil.IntToChar((value >> 8) & 0xFU));
writer.Write(HexUtil.IntToChar((value >> 4) & 0xFU));
writer.Write(HexUtil.IntToChar(value & 0xFU));
}
}
}
}