From dee8d8694f7e91ee5827260bb10bd624040ba54d Mon Sep 17 00:00:00 2001 From: Yves57 Date: Tue, 7 Mar 2017 01:50:05 +0100 Subject: [PATCH] Prevent LOH allocations when constructing large Razor Source Documents. (#1049) * Prevent LOH allocations when constructing large Razor Source Documents. * Feedback --- .../DefaultRazorSourceDocument.cs | 115 +--------------- .../DefaultRazorSourceLineCollection.cs | 123 +++++++++++++++++ .../LargeTextRazorSourceDocument.cs | 125 ++++++++++++++++++ .../RazorSourceDocument.cs | 20 ++- .../LargeTextRazorSourceDocumentTest.cs | 106 +++++++++++++++ .../RazorSourceDocumentTest.cs | 15 +++ 6 files changed, 388 insertions(+), 116 deletions(-) create mode 100644 src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceLineCollection.cs create mode 100644 src/Microsoft.AspNetCore.Razor.Evolution/LargeTextRazorSourceDocument.cs create mode 100644 test/Microsoft.AspNetCore.Razor.Evolution.Test/LargeTextRazorSourceDocumentTest.cs diff --git a/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceDocument.cs b/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceDocument.cs index 01b5866d92..353ccb222b 100644 --- a/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceDocument.cs +++ b/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceDocument.cs @@ -30,7 +30,7 @@ namespace Microsoft.AspNetCore.Razor.Evolution Encoding = encoding; Filename = filename; - _lines = new LineCollection(this, LineCollection.GetLineStarts(content)); + _lines = new DefaultRazorSourceLineCollection(this); } public override char this[int position] => _content[position]; @@ -72,118 +72,5 @@ namespace Microsoft.AspNetCore.Razor.Evolution _content.CopyTo(sourceIndex, destination, destinationIndex, count); } - - private class LineCollection : RazorSourceLineCollection - { - private readonly DefaultRazorSourceDocument _document; - private readonly int[] _lineStarts; - - public LineCollection(DefaultRazorSourceDocument document, int[] lineStarts) - { - _document = document; - _lineStarts = lineStarts; - } - - public override int Count => _lineStarts.Length; - - public override int GetLineLength(int index) - { - if (index < 0 || index >= _lineStarts.Length) - { - throw new IndexOutOfRangeException(nameof(index)); - } - - if (index == _lineStarts.Length - 1) - { - // Last line is special. - return _document.Length - _lineStarts[index]; - } - - return _lineStarts[index + 1] - _lineStarts[index]; - } - - internal override SourceLocation GetLocation(int position) - { - if (position < 0 || position >= _document.Length) - { - throw new IndexOutOfRangeException(nameof(position)); - } - - var index = Array.BinarySearch(_lineStarts, position); - if (index >= 0) - { - // We have an exact match for the start of a line. - Debug.Assert(_lineStarts[index] == position); - - return new SourceLocation(_document.Filename, position, index, characterIndex: 0); - } - - - // Index is the complement of the line *after* the one we want, because BinarySearch tells - // us where we'd put position *if* it were the start of a line. - index = (~index) - 1; - if (index == -1) - { - // There's no preceding line, so it's based on the start of the string - return new SourceLocation(_document.Filename, position, 0, position); - } - else - { - var characterIndex = position - _lineStarts[index]; - return new SourceLocation(_document.Filename, position, index, characterIndex); - } - } - - public static int[] GetLineStarts(string text) - { - var starts = new List(); - - // We always consider a document to have at least a 0th line, even if it's empty. - starts.Add(0); - - var unprocessedCR = false; - - // Length - 1 because we don't care if there was a linebreak as the last character. - for (var i = 0; i < text.Length - 1; i++) - { - var c = text[i]; - var isLineBreak = false; - - switch (c) - { - case '\r': - unprocessedCR = true; - continue; - - case '\n': - unprocessedCR = false; - isLineBreak = true; - break; - - case '\u0085': - case '\u2028': - case '\u2029': - isLineBreak = true; - break; - - } - - if (unprocessedCR) - { - // If we get here it means that we had a CR followed by something other than an LF. - // Add the CR as a line break. - starts.Add(i); - unprocessedCR = false; - } - - if (isLineBreak) - { - starts.Add(i + 1); - } - } - - return starts.ToArray(); - } - } } } diff --git a/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceLineCollection.cs b/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceLineCollection.cs new file mode 100644 index 0000000000..53e8c43f66 --- /dev/null +++ b/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceLineCollection.cs @@ -0,0 +1,123 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Microsoft.AspNetCore.Razor.Evolution +{ + internal class DefaultRazorSourceLineCollection : RazorSourceLineCollection + { + private readonly RazorSourceDocument _document; + private readonly int[] _lineStarts; + + public DefaultRazorSourceLineCollection(RazorSourceDocument document) + { + _document = document; + _lineStarts = GetLineStarts(); + } + + public override int Count => _lineStarts.Length; + + public override int GetLineLength(int index) + { + if (index < 0 || index >= _lineStarts.Length) + { + throw new IndexOutOfRangeException(nameof(index)); + } + + if (index == _lineStarts.Length - 1) + { + // Last line is special. + return _document.Length - _lineStarts[index]; + } + + return _lineStarts[index + 1] - _lineStarts[index]; + } + + internal override SourceLocation GetLocation(int position) + { + if (position < 0 || position >= _document.Length) + { + throw new IndexOutOfRangeException(nameof(position)); + } + + var index = Array.BinarySearch(_lineStarts, position); + if (index >= 0) + { + // We have an exact match for the start of a line. + Debug.Assert(_lineStarts[index] == position); + + return new SourceLocation(_document.Filename, position, index, characterIndex: 0); + } + + + // Index is the complement of the line *after* the one we want, because BinarySearch tells + // us where we'd put position *if* it were the start of a line. + index = (~index) - 1; + if (index == -1) + { + // There's no preceding line, so it's based on the start of the string + return new SourceLocation(_document.Filename, position, 0, position); + } + else + { + var characterIndex = position - _lineStarts[index]; + return new SourceLocation(_document.Filename, position, index, characterIndex); + } + } + + private int[] GetLineStarts() + { + var starts = new List(); + + // We always consider a document to have at least a 0th line, even if it's empty. + starts.Add(0); + + var unprocessedCR = false; + + // Length - 1 because we don't care if there was a linebreak as the last character. + var length = _document.Length; + for (var i = 0; i < length - 1; i++) + { + var c = _document[i]; + var isLineBreak = false; + + switch (c) + { + case '\r': + unprocessedCR = true; + continue; + + case '\n': + unprocessedCR = false; + isLineBreak = true; + break; + + case '\u0085': + case '\u2028': + case '\u2029': + isLineBreak = true; + break; + + } + + if (unprocessedCR) + { + // If we get here it means that we had a CR followed by something other than an LF. + // Add the CR as a line break. + starts.Add(i); + unprocessedCR = false; + } + + if (isLineBreak) + { + starts.Add(i + 1); + } + } + + return starts.ToArray(); + } + } +} diff --git a/src/Microsoft.AspNetCore.Razor.Evolution/LargeTextRazorSourceDocument.cs b/src/Microsoft.AspNetCore.Razor.Evolution/LargeTextRazorSourceDocument.cs new file mode 100644 index 0000000000..5d9b6eb9d2 --- /dev/null +++ b/src/Microsoft.AspNetCore.Razor.Evolution/LargeTextRazorSourceDocument.cs @@ -0,0 +1,125 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace Microsoft.AspNetCore.Razor.Evolution +{ + internal class LargeTextRazorSourceDocument : RazorSourceDocument + { + private readonly List _chunks; + + private readonly int _chunkMaxLength; + + private readonly RazorSourceLineCollection _lines; + + private readonly int _length; + + public LargeTextRazorSourceDocument(StreamReader reader, int chunkMaxLength, Encoding encoding, string filename) + { + if (reader == null) + { + throw new ArgumentNullException(nameof(reader)); + } + + if (encoding == null) + { + throw new ArgumentNullException(nameof(encoding)); + } + + _chunkMaxLength = chunkMaxLength; + Encoding = encoding; + Filename = filename; + + ReadChunks(reader, _chunkMaxLength, out _length, out _chunks); + _lines = new DefaultRazorSourceLineCollection(this); + } + + public override char this[int position] + { + get + { + var chunkIndex = position / _chunkMaxLength; + var insideChunkPosition = position % _chunkMaxLength; + + return _chunks[chunkIndex][insideChunkPosition]; + } + } + + public override Encoding Encoding { get; } + + public override string Filename { get; } + + public override int Length => _length; + + public override RazorSourceLineCollection Lines => _lines; + + public override void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + + if (sourceIndex < 0) + { + throw new ArgumentOutOfRangeException(nameof(sourceIndex)); + } + + if (destinationIndex < 0) + { + throw new ArgumentOutOfRangeException(nameof(destinationIndex)); + } + + if (count < 0 || count > Length - sourceIndex || count > destination.Length - destinationIndex) + { + throw new ArgumentOutOfRangeException(nameof(count)); + } + + if (count == 0) + { + return; + } + + var chunkIndex = sourceIndex / _chunkMaxLength; + var insideChunkPosition = sourceIndex % _chunkMaxLength; + var remaining = count; + var currentDestIndex = destinationIndex; + + while (remaining > 0) + { + var toCopy = Math.Min(remaining, _chunkMaxLength - insideChunkPosition); + Array.Copy(_chunks[chunkIndex], insideChunkPosition, destination, currentDestIndex, toCopy); + + remaining -= toCopy; + currentDestIndex += toCopy; + chunkIndex++; + insideChunkPosition = 0; + } + } + + private static void ReadChunks(StreamReader reader, int chunkMaxLength, out int length, out List chunks) + { + length = 0; + chunks = new List(); + + int read; + do + { + var chunk = new char[chunkMaxLength]; + read = reader.ReadBlock(chunk, 0, chunkMaxLength); + + length += read; + + if (read > 0) + { + chunks.Add(chunk); + } + } + while (read == chunkMaxLength); + } + } +} diff --git a/src/Microsoft.AspNetCore.Razor.Evolution/RazorSourceDocument.cs b/src/Microsoft.AspNetCore.Razor.Evolution/RazorSourceDocument.cs index 9d7505b657..d1e1565937 100644 --- a/src/Microsoft.AspNetCore.Razor.Evolution/RazorSourceDocument.cs +++ b/src/Microsoft.AspNetCore.Razor.Evolution/RazorSourceDocument.cs @@ -9,6 +9,8 @@ namespace Microsoft.AspNetCore.Razor.Evolution { public abstract class RazorSourceDocument { + private const int LargeObjectHeapLimitInChars = 40 * 1024; // 40K Unicode chars is 80KB which is less than the large object heap limit. + internal static readonly RazorSourceDocument[] EmptyArray = new RazorSourceDocument[0]; public abstract Encoding Encoding { get; } @@ -75,16 +77,18 @@ namespace Microsoft.AspNetCore.Razor.Evolution if (streamLength > 0) { + var bufferSize = Math.Min(streamLength, LargeObjectHeapLimitInChars); + var reader = new StreamReader( stream, contentEncoding, detectEncodingFromByteOrderMarks: true, - bufferSize: streamLength, + bufferSize: bufferSize, leaveOpen: true); using (reader) { - content = reader.ReadToEnd(); + reader.Peek(); // Just to populate the encoding if (encoding == null) { @@ -97,6 +101,18 @@ namespace Microsoft.AspNetCore.Razor.Evolution encoding.EncodingName, reader.CurrentEncoding.EncodingName)); } + + if (streamLength > LargeObjectHeapLimitInChars) + { + // If the resulting string would end up on the large object heap, then use LargeTextRazorSourceDocument. + return new LargeTextRazorSourceDocument( + reader, + LargeObjectHeapLimitInChars, + contentEncoding, + filename); + } + + content = reader.ReadToEnd(); } } diff --git a/test/Microsoft.AspNetCore.Razor.Evolution.Test/LargeTextRazorSourceDocumentTest.cs b/test/Microsoft.AspNetCore.Razor.Evolution.Test/LargeTextRazorSourceDocumentTest.cs new file mode 100644 index 0000000000..71778bf358 --- /dev/null +++ b/test/Microsoft.AspNetCore.Razor.Evolution.Test/LargeTextRazorSourceDocumentTest.cs @@ -0,0 +1,106 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using Xunit; + +namespace Microsoft.AspNetCore.Razor.Evolution.Test +{ + public class LargeTextRazorSourceDocumentTest + { + private const int ChunkTestLength = 10; + + [Theory] + [InlineData(ChunkTestLength - 1)] + [InlineData(ChunkTestLength)] + [InlineData(ChunkTestLength + 1)] + [InlineData(ChunkTestLength * 2 - 1)] + [InlineData(ChunkTestLength * 2)] + [InlineData(ChunkTestLength * 2 + 1)] + public void Indexer_ProvidesCharacterAccessToContent(int contentLength) + { + // Arrange + var content = new char[contentLength]; + + for (var i = 0; i < contentLength - 1; i++) + { + content[i] = 'a'; + } + content[contentLength - 1] = 'b'; + var contentString = new string(content); + + var stream = TestRazorSourceDocument.CreateStreamContent(new string(content)); + var reader = new StreamReader(stream, true); + var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml"); + + // Act + var output = new char[contentLength]; + for (var i = 0; i < document.Length; i++) + { + output[i] = document[i]; + } + var outputString = new string(output); + + // Assert + Assert.Equal(contentLength, document.Length); + Assert.Equal(contentString, outputString); + } + + [Theory] + [InlineData("test.cshtml")] + [InlineData(null)] + public void Filename(string fileName) + { + // Arrange + var stream = TestRazorSourceDocument.CreateStreamContent("abc"); + var reader = new StreamReader(stream, true); + + // Act + var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, fileName); + + // Assert + Assert.Equal(fileName, document.Filename); + } + + [Fact] + public void Lines() + { + // Arrange + var stream = TestRazorSourceDocument.CreateStreamContent("abc\ndef\nghi"); + var reader = new StreamReader(stream, true); + + // Act + var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml"); + + // Assert + Assert.Equal(3, document.Lines.Count); + } + + [Theory] + [InlineData("", 0, 0, 0)] // Nothing to copy + [InlineData("a", 0, 100, 1)] // Destination index different from start + [InlineData("j", ChunkTestLength - 1, 0, 1)] // One char just before the chunk limit + [InlineData("k", ChunkTestLength, 0, 1)] // One char one the chunk limit + [InlineData("l", ChunkTestLength + 1, 0, 1)] // One char just after the chunk limit + [InlineData("jk", ChunkTestLength - 1, 0, 2)] // Two char that are on both chunk sides + [InlineData("abcdefghijklmnopqrstuvwxy", 0, 100, 25)] // Everything except the last + [InlineData("abcdefghijklmnopqrstuvwxyz", 0, 0, 26)] // Copy all + [InlineData("xyz", 23, 0, 3)] // The last chars + public void CopyTo(string expected, int sourceIndex, int destinationIndex, int count) + { + // Arrange + var stream = TestRazorSourceDocument.CreateStreamContent("abcdefghijklmnopqrstuvwxyz"); + + var reader = new StreamReader(stream, true); + var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml"); + + // Act + var destination = new char[1000]; + document.CopyTo(sourceIndex, destination, destinationIndex, count); + + // Assert + var copy = new string(destination, destinationIndex, count); + Assert.Equal(expected, copy); + } + } +} diff --git a/test/Microsoft.AspNetCore.Razor.Evolution.Test/RazorSourceDocumentTest.cs b/test/Microsoft.AspNetCore.Razor.Evolution.Test/RazorSourceDocumentTest.cs index 9a702a91da..fc80d816d0 100644 --- a/test/Microsoft.AspNetCore.Razor.Evolution.Test/RazorSourceDocumentTest.cs +++ b/test/Microsoft.AspNetCore.Razor.Evolution.Test/RazorSourceDocumentTest.cs @@ -94,5 +94,20 @@ namespace Microsoft.AspNetCore.Razor.Evolution () => RazorSourceDocument.ReadFrom(content, "file.cshtml", Encoding.UTF8)); Assert.Equal(expectedMessage, exception.Message); } + + [Fact] + public void ReadFrom_LargeContent() + { + // Arrange + var content = TestRazorSourceDocument.CreateStreamContent(new string('a', 100000)); + + // Act + var document = RazorSourceDocument.ReadFrom(content, "file.cshtml"); + + // Assert + Assert.IsType(document); + Assert.Equal("file.cshtml", document.Filename); + Assert.Same(Encoding.UTF8, document.Encoding); + } } }