Change HTML splitting logic to not split on surrogate pairs.
- When a surrogate pair is about to be split down the middle we reduce the size of our split by 1 character. This way we split right before a surrogate pair. In the case of zero width joiners, if we split on a zero width joiner we still render valid bytes because the zero width joiner by itself can stand alone. - Added tests for the various split cases. #2470
This commit is contained in:
parent
accea6edbd
commit
cfd63e1e2e
|
|
@ -325,28 +325,56 @@ namespace Microsoft.AspNetCore.Razor.Language.CodeGeneration
|
|||
|
||||
var content = builder.ToString();
|
||||
|
||||
var charactersConsumed = 0;
|
||||
WriteHtmlLiteral(context, MaxStringLiteralLength, content);
|
||||
}
|
||||
|
||||
// Render the string in pieces to avoid Roslyn OOM exceptions at compile time: https://github.com/aspnet/External/issues/54
|
||||
while (charactersConsumed < content.Length)
|
||||
// Internal for testing
|
||||
internal void WriteHtmlLiteral(CodeRenderingContext context, int maxStringLiteralLength, string literal)
|
||||
{
|
||||
if (literal.Length <= maxStringLiteralLength)
|
||||
{
|
||||
string textToRender;
|
||||
if (content.Length <= MaxStringLiteralLength)
|
||||
WriteLiteral(literal);
|
||||
return;
|
||||
}
|
||||
|
||||
// String is too large, render the string in pieces to avoid Roslyn OOM exceptions at compile time: https://github.com/aspnet/External/issues/54
|
||||
var charactersConsumed = 0;
|
||||
do
|
||||
{
|
||||
var charactersRemaining = literal.Length - charactersConsumed;
|
||||
var charactersToSubstring = Math.Min(maxStringLiteralLength, charactersRemaining);
|
||||
var lastCharBeforeSplitIndex = charactersConsumed + charactersToSubstring - 1;
|
||||
var lastCharBeforeSplit = literal[lastCharBeforeSplitIndex];
|
||||
|
||||
if (char.IsHighSurrogate(lastCharBeforeSplit))
|
||||
{
|
||||
textToRender = content;
|
||||
}
|
||||
else
|
||||
{
|
||||
var charactersToSubstring = Math.Min(MaxStringLiteralLength, content.Length - charactersConsumed);
|
||||
textToRender = content.Substring(charactersConsumed, charactersToSubstring);
|
||||
if (charactersRemaining > 1)
|
||||
{
|
||||
// Take one less character this iteration. We're attempting to split inbetween a surrogate pair.
|
||||
// This can happen when something like an emoji sits on the barrier between splits; if we were to
|
||||
// split the emoji we'd end up with invalid bytes in our output.
|
||||
charactersToSubstring--;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The user has an invalid file with a partial surrogate a the splitting point.
|
||||
// We'll let the invalid character flow but we'll explode later on.
|
||||
}
|
||||
}
|
||||
|
||||
context.CodeWriter
|
||||
.WriteStartMethodInvocation(WriteHtmlContentMethod)
|
||||
.WriteStringLiteral(textToRender)
|
||||
.WriteEndMethodInvocation();
|
||||
var textToRender = literal.Substring(charactersConsumed, charactersToSubstring);
|
||||
|
||||
WriteLiteral(textToRender);
|
||||
|
||||
charactersConsumed += textToRender.Length;
|
||||
} while (charactersConsumed < literal.Length);
|
||||
|
||||
void WriteLiteral(string content)
|
||||
{
|
||||
context.CodeWriter
|
||||
.WriteStartMethodInvocation(WriteHtmlContentMethod)
|
||||
.WriteStringLiteral(content)
|
||||
.WriteEndMethodInvocation();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -339,6 +339,91 @@ if (true) { }
|
|||
ignoreLineEndingDifferences: true);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WriteHtmlLiteral_WithinMaxSize_WritesSingleLiteral()
|
||||
{
|
||||
// Arrange
|
||||
var codeWriter = new CodeWriter();
|
||||
var writer = new RuntimeNodeWriter();
|
||||
var context = TestCodeRenderingContext.CreateRuntime();
|
||||
|
||||
// Act
|
||||
writer.WriteHtmlLiteral(context, maxStringLiteralLength: 6, "Hello");
|
||||
|
||||
// Assert
|
||||
var csharp = context.CodeWriter.GenerateCode();
|
||||
Assert.Equal(
|
||||
@"WriteLiteral(""Hello"");
|
||||
",
|
||||
csharp,
|
||||
ignoreLineEndingDifferences: true);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WriteHtmlLiteral_GreaterThanMaxSize_WritesMultipleLiterals()
|
||||
{
|
||||
// Arrange
|
||||
var codeWriter = new CodeWriter();
|
||||
var writer = new RuntimeNodeWriter();
|
||||
var context = TestCodeRenderingContext.CreateRuntime();
|
||||
|
||||
// Act
|
||||
writer.WriteHtmlLiteral(context, maxStringLiteralLength: 6, "Hello World");
|
||||
|
||||
// Assert
|
||||
var csharp = context.CodeWriter.GenerateCode();
|
||||
Assert.Equal(
|
||||
@"WriteLiteral(""Hello "");
|
||||
WriteLiteral(""World"");
|
||||
",
|
||||
csharp,
|
||||
ignoreLineEndingDifferences: true);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WriteHtmlLiteral_GreaterThanMaxSize_SingleEmojisSplit()
|
||||
{
|
||||
// Arrange
|
||||
var codeWriter = new CodeWriter();
|
||||
var writer = new RuntimeNodeWriter();
|
||||
var context = TestCodeRenderingContext.CreateRuntime();
|
||||
|
||||
// Act
|
||||
writer.WriteHtmlLiteral(context, maxStringLiteralLength: 2, " 👦");
|
||||
|
||||
// Assert
|
||||
var csharp = context.CodeWriter.GenerateCode();
|
||||
Assert.Equal(
|
||||
@"WriteLiteral("" "");
|
||||
WriteLiteral(""👦"");
|
||||
",
|
||||
csharp,
|
||||
ignoreLineEndingDifferences: true);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WriteHtmlLiteral_GreaterThanMaxSize_SequencedZeroWithJoinedEmojisSplit()
|
||||
{
|
||||
// Arrange
|
||||
var codeWriter = new CodeWriter();
|
||||
var writer = new RuntimeNodeWriter();
|
||||
var context = TestCodeRenderingContext.CreateRuntime();
|
||||
|
||||
// Act
|
||||
writer.WriteHtmlLiteral(context, maxStringLiteralLength: 6, "👩👩👧👧👩👩👧👧");
|
||||
|
||||
// Assert
|
||||
var csharp = context.CodeWriter.GenerateCode();
|
||||
Assert.Equal(
|
||||
@"WriteLiteral(""👩👩"");
|
||||
WriteLiteral(""👧👧"");
|
||||
WriteLiteral(""👩👩"");
|
||||
WriteLiteral(""👧👧"");
|
||||
",
|
||||
csharp,
|
||||
ignoreLineEndingDifferences: true);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WriteHtmlContent_RendersContentCorrectly()
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue