From 1089eecbfccfdd54dd8840b978b534bdc4379fbc Mon Sep 17 00:00:00 2001 From: Steve Sanderson Date: Wed, 22 Aug 2018 14:52:35 +0100 Subject: [PATCH] In RenderBatchWriter, deduplicate strings only when safe to do so (#1340) We allow deduplication of HTML element and attribute names, plus whitespace text nodes / attribute values. --- .../Circuits/RenderBatchWriter.cs | 38 ++++++--- .../Circuits/RenderBatchWriterTest.cs | 79 ++++++++++++++++--- 2 files changed, 94 insertions(+), 23 deletions(-) diff --git a/src/Microsoft.AspNetCore.Blazor.Server/Circuits/RenderBatchWriter.cs b/src/Microsoft.AspNetCore.Blazor.Server/Circuits/RenderBatchWriter.cs index 9e5d36e178..28c8d9b482 100644 --- a/src/Microsoft.AspNetCore.Blazor.Server/Circuits/RenderBatchWriter.cs +++ b/src/Microsoft.AspNetCore.Blazor.Server/Circuits/RenderBatchWriter.cs @@ -34,11 +34,13 @@ namespace Microsoft.AspNetCore.Blazor.Server.Circuits internal class RenderBatchWriter : IDisposable { private readonly List _strings; + private readonly Dictionary _deduplicatedStringIndices; private readonly BinaryWriter _binaryWriter; public RenderBatchWriter(Stream output, bool leaveOpen) { _strings = new List(); + _deduplicatedStringIndices = new Dictionary(); _binaryWriter = new BinaryWriter(output, Encoding.UTF8, leaveOpen); } @@ -104,7 +106,7 @@ namespace Microsoft.AspNetCore.Blazor.Server.Circuits _binaryWriter.Write((int)edit.Type); _binaryWriter.Write(edit.SiblingIndex); _binaryWriter.Write(edit.ReferenceFrameIndex); - WriteString(edit.RemovedAttributeName); + WriteString(edit.RemovedAttributeName, allowDeduplication: true); } int Write(in ArrayRange frames) @@ -137,7 +139,7 @@ namespace Microsoft.AspNetCore.Blazor.Server.Circuits switch (frame.FrameType) { case RenderTreeFrameType.Attribute: - WriteString(frame.AttributeName); + WriteString(frame.AttributeName, allowDeduplication: true); if (frame.AttributeValue is bool boolValue) { // Encoding the bool as either "" or null is pretty odd, but avoids @@ -147,11 +149,12 @@ namespace Microsoft.AspNetCore.Blazor.Server.Circuits // or something else, we'll need a different encoding mechanism. Since there // would never be more than (say) 2^28 (268 million) distinct string table // entries, we could use the first 4 bits to encode the value type. - WriteString(boolValue ? string.Empty : null); + WriteString(boolValue ? string.Empty : null, allowDeduplication: true); } else { - WriteString(frame.AttributeValue as string); + var attributeValueString = frame.AttributeValue as string; + WriteString(attributeValueString, allowDeduplication: string.IsNullOrEmpty(attributeValueString)); } _binaryWriter.Write(frame.AttributeEventHandlerId); break; @@ -168,11 +171,11 @@ namespace Microsoft.AspNetCore.Blazor.Server.Circuits break; case RenderTreeFrameType.Element: _binaryWriter.Write(frame.ElementSubtreeLength); - WriteString(frame.ElementName); + WriteString(frame.ElementName, allowDeduplication: true); WritePadding(_binaryWriter, 4); break; case RenderTreeFrameType.ElementReferenceCapture: - WriteString(frame.ElementReferenceCaptureId); + WriteString(frame.ElementReferenceCaptureId, allowDeduplication: false); WritePadding(_binaryWriter, 8); break; case RenderTreeFrameType.Region: @@ -180,11 +183,13 @@ namespace Microsoft.AspNetCore.Blazor.Server.Circuits WritePadding(_binaryWriter, 8); break; case RenderTreeFrameType.Text: - WriteString(frame.TextContent); + WriteString( + frame.TextContent, + allowDeduplication: string.IsNullOrWhiteSpace(frame.TextContent)); WritePadding(_binaryWriter, 8); break; case RenderTreeFrameType.Markup: - WriteString(frame.MarkupContent); + WriteString(frame.MarkupContent, allowDeduplication: false); WritePadding(_binaryWriter, 8); break; default: @@ -207,7 +212,7 @@ namespace Microsoft.AspNetCore.Blazor.Server.Circuits return startPos; } - void WriteString(string value) + void WriteString(string value, bool allowDeduplication) { if (value == null) { @@ -215,9 +220,20 @@ namespace Microsoft.AspNetCore.Blazor.Server.Circuits } else { - var stringIndex = _strings.Count; + int stringIndex; + + if (!allowDeduplication || !_deduplicatedStringIndices.TryGetValue(value, out stringIndex)) + { + stringIndex = _strings.Count; + _strings.Add(value); + + if (allowDeduplication) + { + _deduplicatedStringIndices.Add(value, stringIndex); + } + } + _binaryWriter.Write(stringIndex); - _strings.Add(value); } } diff --git a/test/Microsoft.AspnetCore.Blazor.Server.Test/Circuits/RenderBatchWriterTest.cs b/test/Microsoft.AspnetCore.Blazor.Server.Test/Circuits/RenderBatchWriterTest.cs index 636b67b311..6c0081f21e 100644 --- a/test/Microsoft.AspnetCore.Blazor.Server.Test/Circuits/RenderBatchWriterTest.cs +++ b/test/Microsoft.AspnetCore.Blazor.Server.Test/Circuits/RenderBatchWriterTest.cs @@ -6,6 +6,7 @@ using Microsoft.AspNetCore.Blazor.Rendering; using Microsoft.AspNetCore.Blazor.RenderTree; using Microsoft.AspNetCore.Blazor.Server.Circuits; using System; +using System.Collections.Generic; using System.IO; using System.Text; using Xunit; @@ -147,6 +148,7 @@ namespace Microsoft.AspNetCore.Blazor.Server RenderTreeEdit.StepIn(107), RenderTreeEdit.StepOut(), RenderTreeEdit.UpdateMarkup(108, 109), + RenderTreeEdit.RemoveAttribute(110, "Some removed attribute"), // To test deduplication }; var bytes = Serialize(new RenderBatch( new ArrayRange(new[] @@ -166,7 +168,7 @@ namespace Microsoft.AspNetCore.Blazor.Server AssertBinaryContents(bytes, 0, 123, // Component ID for diff 0 - 8, // diff[0].Edits.Count + 9, // diff[0].Edits.Count RenderTreeEditType.PrependFrame, 456, 789, NullStringMarker, RenderTreeEditType.RemoveFrame, 101, 0, NullStringMarker, RenderTreeEditType.SetAttribute, 102, 103, NullStringMarker, @@ -174,8 +176,12 @@ namespace Microsoft.AspNetCore.Blazor.Server RenderTreeEditType.UpdateText, 105, 106, NullStringMarker, RenderTreeEditType.StepIn, 107, 0, NullStringMarker, RenderTreeEditType.StepOut, 0, 0, NullStringMarker, - RenderTreeEditType.UpdateMarkup, 108, 109, NullStringMarker + RenderTreeEditType.UpdateMarkup, 108, 109, NullStringMarker, + RenderTreeEditType.RemoveAttribute, 110, 0, "Some removed attribute" ); + + // We can deduplicate attribute names + Assert.Equal(new[] { "Some removed attribute" }, ReadStringTable(bytes)); } [Fact] @@ -201,14 +207,23 @@ namespace Microsoft.AspNetCore.Blazor.Server .WithRegionSubtreeLength(1234), RenderTreeFrame.Text(131, "Some text"), RenderTreeFrame.Markup(132, "Some markup"), - }, 10), + RenderTreeFrame.Text(133, "\n\t "), + + // Testing deduplication + RenderTreeFrame.Attribute(134, "Attribute with string value", "String value"), + RenderTreeFrame.Element(135, "Some element") // Will be dedupliated + .WithElementSubtreeLength(999), + RenderTreeFrame.Text(136, "Some text"), // Will not be deduplicated + RenderTreeFrame.Markup(137, "Some markup"), // Will not be deduplicated + RenderTreeFrame.Text(138, "\n\t "), // Will be deduplicated + }, 16), default, default)); // Assert var referenceFramesStartIndex = ReadInt(bytes, bytes.Length - 16); AssertBinaryContents(bytes, referenceFramesStartIndex, - 10, // Number of frames + 16, // Number of frames RenderTreeFrameType.Attribute, "Attribute with string value", "String value", 0, RenderTreeFrameType.Attribute, "Attribute with nonstring value", NullStringMarker, 0, RenderTreeFrameType.Attribute, "Attribute with delegate value", NullStringMarker, 789, @@ -218,8 +233,30 @@ namespace Microsoft.AspNetCore.Blazor.Server RenderTreeFrameType.ElementReferenceCapture, "my unique ID", 0, 0, RenderTreeFrameType.Region, 1234, 0, 0, RenderTreeFrameType.Text, "Some text", 0, 0, - RenderTreeFrameType.Markup, "Some markup", 0, 0 + RenderTreeFrameType.Markup, "Some markup", 0, 0, + RenderTreeFrameType.Text, "\n\t ", 0, 0, + RenderTreeFrameType.Attribute, "Attribute with string value", "String value", 0, + RenderTreeFrameType.Element, 999, "Some element", 0, + RenderTreeFrameType.Text, "Some text", 0, 0, + RenderTreeFrameType.Markup, "Some markup", 0, 0, + RenderTreeFrameType.Text, "\n\t ", 0, 0 ); + + Assert.Equal(new[] + { + "Attribute with string value", + "String value", + "Attribute with nonstring value", + "Attribute with delegate value", + "Some element", + "my unique ID", + "Some text", + "Some markup", + "\n\t ", + "String value", + "Some text", + "Some markup", + }, ReadStringTable(bytes)); } private Span Serialize(RenderBatch renderBatch) @@ -232,12 +269,34 @@ namespace Microsoft.AspNetCore.Blazor.Server } } - static void AssertBinaryContents(Span data, int startIndex, params object[] entries) + static string[] ReadStringTable(Span data) { var bytes = data.ToArray(); - // The string table position is given by the final int + // The string table position is given by the final int, and continues + // until we get to the final set of top-level indices var stringTableStartPosition = BitConverter.ToInt32(bytes, bytes.Length - 4); + var stringTableEndPositionExcl = bytes.Length - 20; + + var result = new List(); + for (var entryPosition = stringTableStartPosition; + entryPosition < stringTableEndPositionExcl; + entryPosition += 4) + { + // The string table entries are all length-prefixed UTF8 blobs + var tableEntryPos = BitConverter.ToInt32(bytes, entryPosition); + var length = (int)ReadUnsignedLEB128(bytes, tableEntryPos, out var numLEB128Bytes); + var value = Encoding.UTF8.GetString(bytes, tableEntryPos + numLEB128Bytes, length); + result.Add(value); + } + + return result.ToArray(); + } + + static void AssertBinaryContents(Span data, int startIndex, params object[] entries) + { + var bytes = data.ToArray(); + var stringTableEntries = ReadStringTable(data); using (var ms = new MemoryStream(bytes)) using (var reader = new BinaryReader(ms)) @@ -267,11 +326,7 @@ namespace Microsoft.AspNetCore.Blazor.Server } else { - // The string table entries are all length-prefixed UTF8 blobs - var tableEntryPos = BitConverter.ToInt32(bytes, stringTableStartPosition + 4 * indexIntoStringTable); - var length = (int)ReadUnsignedLEB128(bytes, tableEntryPos, out var numLEB128Bytes); - var value = Encoding.UTF8.GetString(bytes, tableEntryPos + numLEB128Bytes, length); - Assert.Equal(expectedString, value); + Assert.Equal(expectedString, stringTableEntries[indexIntoStringTable]); } } else