Change HTML splitting logic to not split on surrogate pairs.

- When a surrogate pair is about to be split down the middle we reduce the size of our split by 1 character. This way we split right before a surrogate pair. In the case of zero width joiners, if we split on a zero width joiner we still render valid bytes because the zero width joiner by itself can stand alone.
- Added tests for the various split cases.

#2470
This commit is contained in:
N. Taylor Mullen 2018-07-13 12:36:18 -07:00
parent accea6edbd
commit cfd63e1e2e
2 changed files with 128 additions and 15 deletions

View File

@ -325,28 +325,56 @@ namespace Microsoft.AspNetCore.Razor.Language.CodeGeneration
var content = builder.ToString();
var charactersConsumed = 0;
WriteHtmlLiteral(context, MaxStringLiteralLength, content);
}
// Render the string in pieces to avoid Roslyn OOM exceptions at compile time: https://github.com/aspnet/External/issues/54
while (charactersConsumed < content.Length)
// Internal for testing
internal void WriteHtmlLiteral(CodeRenderingContext context, int maxStringLiteralLength, string literal)
{
if (literal.Length <= maxStringLiteralLength)
{
string textToRender;
if (content.Length <= MaxStringLiteralLength)
WriteLiteral(literal);
return;
}
// String is too large, render the string in pieces to avoid Roslyn OOM exceptions at compile time: https://github.com/aspnet/External/issues/54
var charactersConsumed = 0;
do
{
var charactersRemaining = literal.Length - charactersConsumed;
var charactersToSubstring = Math.Min(maxStringLiteralLength, charactersRemaining);
var lastCharBeforeSplitIndex = charactersConsumed + charactersToSubstring - 1;
var lastCharBeforeSplit = literal[lastCharBeforeSplitIndex];
if (char.IsHighSurrogate(lastCharBeforeSplit))
{
textToRender = content;
}
else
{
var charactersToSubstring = Math.Min(MaxStringLiteralLength, content.Length - charactersConsumed);
textToRender = content.Substring(charactersConsumed, charactersToSubstring);
if (charactersRemaining > 1)
{
// Take one less character this iteration. We're attempting to split inbetween a surrogate pair.
// This can happen when something like an emoji sits on the barrier between splits; if we were to
// split the emoji we'd end up with invalid bytes in our output.
charactersToSubstring--;
}
else
{
// The user has an invalid file with a partial surrogate a the splitting point.
// We'll let the invalid character flow but we'll explode later on.
}
}
context.CodeWriter
.WriteStartMethodInvocation(WriteHtmlContentMethod)
.WriteStringLiteral(textToRender)
.WriteEndMethodInvocation();
var textToRender = literal.Substring(charactersConsumed, charactersToSubstring);
WriteLiteral(textToRender);
charactersConsumed += textToRender.Length;
} while (charactersConsumed < literal.Length);
void WriteLiteral(string content)
{
context.CodeWriter
.WriteStartMethodInvocation(WriteHtmlContentMethod)
.WriteStringLiteral(content)
.WriteEndMethodInvocation();
}
}

View File

@ -339,6 +339,91 @@ if (true) { }
ignoreLineEndingDifferences: true);
}
[Fact]
public void WriteHtmlLiteral_WithinMaxSize_WritesSingleLiteral()
{
// Arrange
var codeWriter = new CodeWriter();
var writer = new RuntimeNodeWriter();
var context = TestCodeRenderingContext.CreateRuntime();
// Act
writer.WriteHtmlLiteral(context, maxStringLiteralLength: 6, "Hello");
// Assert
var csharp = context.CodeWriter.GenerateCode();
Assert.Equal(
@"WriteLiteral(""Hello"");
",
csharp,
ignoreLineEndingDifferences: true);
}
[Fact]
public void WriteHtmlLiteral_GreaterThanMaxSize_WritesMultipleLiterals()
{
// Arrange
var codeWriter = new CodeWriter();
var writer = new RuntimeNodeWriter();
var context = TestCodeRenderingContext.CreateRuntime();
// Act
writer.WriteHtmlLiteral(context, maxStringLiteralLength: 6, "Hello World");
// Assert
var csharp = context.CodeWriter.GenerateCode();
Assert.Equal(
@"WriteLiteral(""Hello "");
WriteLiteral(""World"");
",
csharp,
ignoreLineEndingDifferences: true);
}
[Fact]
public void WriteHtmlLiteral_GreaterThanMaxSize_SingleEmojisSplit()
{
// Arrange
var codeWriter = new CodeWriter();
var writer = new RuntimeNodeWriter();
var context = TestCodeRenderingContext.CreateRuntime();
// Act
writer.WriteHtmlLiteral(context, maxStringLiteralLength: 2, " 👦");
// Assert
var csharp = context.CodeWriter.GenerateCode();
Assert.Equal(
@"WriteLiteral("" "");
WriteLiteral(""👦"");
",
csharp,
ignoreLineEndingDifferences: true);
}
[Fact]
public void WriteHtmlLiteral_GreaterThanMaxSize_SequencedZeroWithJoinedEmojisSplit()
{
// Arrange
var codeWriter = new CodeWriter();
var writer = new RuntimeNodeWriter();
var context = TestCodeRenderingContext.CreateRuntime();
// Act
writer.WriteHtmlLiteral(context, maxStringLiteralLength: 6, "👩‍👩‍👧‍👧👩‍👩‍👧‍👧");
// Assert
var csharp = context.CodeWriter.GenerateCode();
Assert.Equal(
@"WriteLiteral(""👩‍👩‍"");
WriteLiteral(""👧👧"");
WriteLiteral(""👩👩"");
WriteLiteral(""👧👧"");
",
csharp,
ignoreLineEndingDifferences: true);
}
[Fact]
public void WriteHtmlContent_RendersContentCorrectly()
{