Prevent LOH allocations when constructing large Razor Source Documents. (#1049)

* Prevent LOH allocations when constructing large Razor Source Documents.

* Feedback
This commit is contained in:
Yves57 2017-03-07 01:50:05 +01:00 committed by Ryan Nowak
parent 3159266169
commit dee8d8694f
6 changed files with 388 additions and 116 deletions

View File

@ -30,7 +30,7 @@ namespace Microsoft.AspNetCore.Razor.Evolution
Encoding = encoding;
Filename = filename;
_lines = new LineCollection(this, LineCollection.GetLineStarts(content));
_lines = new DefaultRazorSourceLineCollection(this);
}
public override char this[int position] => _content[position];
@ -72,118 +72,5 @@ namespace Microsoft.AspNetCore.Razor.Evolution
_content.CopyTo(sourceIndex, destination, destinationIndex, count);
}
private class LineCollection : RazorSourceLineCollection
{
private readonly DefaultRazorSourceDocument _document;
private readonly int[] _lineStarts;
public LineCollection(DefaultRazorSourceDocument document, int[] lineStarts)
{
_document = document;
_lineStarts = lineStarts;
}
public override int Count => _lineStarts.Length;
public override int GetLineLength(int index)
{
if (index < 0 || index >= _lineStarts.Length)
{
throw new IndexOutOfRangeException(nameof(index));
}
if (index == _lineStarts.Length - 1)
{
// Last line is special.
return _document.Length - _lineStarts[index];
}
return _lineStarts[index + 1] - _lineStarts[index];
}
internal override SourceLocation GetLocation(int position)
{
if (position < 0 || position >= _document.Length)
{
throw new IndexOutOfRangeException(nameof(position));
}
var index = Array.BinarySearch<int>(_lineStarts, position);
if (index >= 0)
{
// We have an exact match for the start of a line.
Debug.Assert(_lineStarts[index] == position);
return new SourceLocation(_document.Filename, position, index, characterIndex: 0);
}
// Index is the complement of the line *after* the one we want, because BinarySearch tells
// us where we'd put position *if* it were the start of a line.
index = (~index) - 1;
if (index == -1)
{
// There's no preceding line, so it's based on the start of the string
return new SourceLocation(_document.Filename, position, 0, position);
}
else
{
var characterIndex = position - _lineStarts[index];
return new SourceLocation(_document.Filename, position, index, characterIndex);
}
}
public static int[] GetLineStarts(string text)
{
var starts = new List<int>();
// We always consider a document to have at least a 0th line, even if it's empty.
starts.Add(0);
var unprocessedCR = false;
// Length - 1 because we don't care if there was a linebreak as the last character.
for (var i = 0; i < text.Length - 1; i++)
{
var c = text[i];
var isLineBreak = false;
switch (c)
{
case '\r':
unprocessedCR = true;
continue;
case '\n':
unprocessedCR = false;
isLineBreak = true;
break;
case '\u0085':
case '\u2028':
case '\u2029':
isLineBreak = true;
break;
}
if (unprocessedCR)
{
// If we get here it means that we had a CR followed by something other than an LF.
// Add the CR as a line break.
starts.Add(i);
unprocessedCR = false;
}
if (isLineBreak)
{
starts.Add(i + 1);
}
}
return starts.ToArray();
}
}
}
}

View File

@ -0,0 +1,123 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Microsoft.AspNetCore.Razor.Evolution
{
internal class DefaultRazorSourceLineCollection : RazorSourceLineCollection
{
private readonly RazorSourceDocument _document;
private readonly int[] _lineStarts;
public DefaultRazorSourceLineCollection(RazorSourceDocument document)
{
_document = document;
_lineStarts = GetLineStarts();
}
public override int Count => _lineStarts.Length;
public override int GetLineLength(int index)
{
if (index < 0 || index >= _lineStarts.Length)
{
throw new IndexOutOfRangeException(nameof(index));
}
if (index == _lineStarts.Length - 1)
{
// Last line is special.
return _document.Length - _lineStarts[index];
}
return _lineStarts[index + 1] - _lineStarts[index];
}
internal override SourceLocation GetLocation(int position)
{
if (position < 0 || position >= _document.Length)
{
throw new IndexOutOfRangeException(nameof(position));
}
var index = Array.BinarySearch<int>(_lineStarts, position);
if (index >= 0)
{
// We have an exact match for the start of a line.
Debug.Assert(_lineStarts[index] == position);
return new SourceLocation(_document.Filename, position, index, characterIndex: 0);
}
// Index is the complement of the line *after* the one we want, because BinarySearch tells
// us where we'd put position *if* it were the start of a line.
index = (~index) - 1;
if (index == -1)
{
// There's no preceding line, so it's based on the start of the string
return new SourceLocation(_document.Filename, position, 0, position);
}
else
{
var characterIndex = position - _lineStarts[index];
return new SourceLocation(_document.Filename, position, index, characterIndex);
}
}
private int[] GetLineStarts()
{
var starts = new List<int>();
// We always consider a document to have at least a 0th line, even if it's empty.
starts.Add(0);
var unprocessedCR = false;
// Length - 1 because we don't care if there was a linebreak as the last character.
var length = _document.Length;
for (var i = 0; i < length - 1; i++)
{
var c = _document[i];
var isLineBreak = false;
switch (c)
{
case '\r':
unprocessedCR = true;
continue;
case '\n':
unprocessedCR = false;
isLineBreak = true;
break;
case '\u0085':
case '\u2028':
case '\u2029':
isLineBreak = true;
break;
}
if (unprocessedCR)
{
// If we get here it means that we had a CR followed by something other than an LF.
// Add the CR as a line break.
starts.Add(i);
unprocessedCR = false;
}
if (isLineBreak)
{
starts.Add(i + 1);
}
}
return starts.ToArray();
}
}
}

View File

@ -0,0 +1,125 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace Microsoft.AspNetCore.Razor.Evolution
{
internal class LargeTextRazorSourceDocument : RazorSourceDocument
{
private readonly List<char[]> _chunks;
private readonly int _chunkMaxLength;
private readonly RazorSourceLineCollection _lines;
private readonly int _length;
public LargeTextRazorSourceDocument(StreamReader reader, int chunkMaxLength, Encoding encoding, string filename)
{
if (reader == null)
{
throw new ArgumentNullException(nameof(reader));
}
if (encoding == null)
{
throw new ArgumentNullException(nameof(encoding));
}
_chunkMaxLength = chunkMaxLength;
Encoding = encoding;
Filename = filename;
ReadChunks(reader, _chunkMaxLength, out _length, out _chunks);
_lines = new DefaultRazorSourceLineCollection(this);
}
public override char this[int position]
{
get
{
var chunkIndex = position / _chunkMaxLength;
var insideChunkPosition = position % _chunkMaxLength;
return _chunks[chunkIndex][insideChunkPosition];
}
}
public override Encoding Encoding { get; }
public override string Filename { get; }
public override int Length => _length;
public override RazorSourceLineCollection Lines => _lines;
public override void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
{
if (destination == null)
{
throw new ArgumentNullException(nameof(destination));
}
if (sourceIndex < 0)
{
throw new ArgumentOutOfRangeException(nameof(sourceIndex));
}
if (destinationIndex < 0)
{
throw new ArgumentOutOfRangeException(nameof(destinationIndex));
}
if (count < 0 || count > Length - sourceIndex || count > destination.Length - destinationIndex)
{
throw new ArgumentOutOfRangeException(nameof(count));
}
if (count == 0)
{
return;
}
var chunkIndex = sourceIndex / _chunkMaxLength;
var insideChunkPosition = sourceIndex % _chunkMaxLength;
var remaining = count;
var currentDestIndex = destinationIndex;
while (remaining > 0)
{
var toCopy = Math.Min(remaining, _chunkMaxLength - insideChunkPosition);
Array.Copy(_chunks[chunkIndex], insideChunkPosition, destination, currentDestIndex, toCopy);
remaining -= toCopy;
currentDestIndex += toCopy;
chunkIndex++;
insideChunkPosition = 0;
}
}
private static void ReadChunks(StreamReader reader, int chunkMaxLength, out int length, out List<char[]> chunks)
{
length = 0;
chunks = new List<char[]>();
int read;
do
{
var chunk = new char[chunkMaxLength];
read = reader.ReadBlock(chunk, 0, chunkMaxLength);
length += read;
if (read > 0)
{
chunks.Add(chunk);
}
}
while (read == chunkMaxLength);
}
}
}

View File

@ -9,6 +9,8 @@ namespace Microsoft.AspNetCore.Razor.Evolution
{
public abstract class RazorSourceDocument
{
private const int LargeObjectHeapLimitInChars = 40 * 1024; // 40K Unicode chars is 80KB which is less than the large object heap limit.
internal static readonly RazorSourceDocument[] EmptyArray = new RazorSourceDocument[0];
public abstract Encoding Encoding { get; }
@ -75,16 +77,18 @@ namespace Microsoft.AspNetCore.Razor.Evolution
if (streamLength > 0)
{
var bufferSize = Math.Min(streamLength, LargeObjectHeapLimitInChars);
var reader = new StreamReader(
stream,
contentEncoding,
detectEncodingFromByteOrderMarks: true,
bufferSize: streamLength,
bufferSize: bufferSize,
leaveOpen: true);
using (reader)
{
content = reader.ReadToEnd();
reader.Peek(); // Just to populate the encoding
if (encoding == null)
{
@ -97,6 +101,18 @@ namespace Microsoft.AspNetCore.Razor.Evolution
encoding.EncodingName,
reader.CurrentEncoding.EncodingName));
}
if (streamLength > LargeObjectHeapLimitInChars)
{
// If the resulting string would end up on the large object heap, then use LargeTextRazorSourceDocument.
return new LargeTextRazorSourceDocument(
reader,
LargeObjectHeapLimitInChars,
contentEncoding,
filename);
}
content = reader.ReadToEnd();
}
}

View File

@ -0,0 +1,106 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Xunit;
namespace Microsoft.AspNetCore.Razor.Evolution.Test
{
public class LargeTextRazorSourceDocumentTest
{
private const int ChunkTestLength = 10;
[Theory]
[InlineData(ChunkTestLength - 1)]
[InlineData(ChunkTestLength)]
[InlineData(ChunkTestLength + 1)]
[InlineData(ChunkTestLength * 2 - 1)]
[InlineData(ChunkTestLength * 2)]
[InlineData(ChunkTestLength * 2 + 1)]
public void Indexer_ProvidesCharacterAccessToContent(int contentLength)
{
// Arrange
var content = new char[contentLength];
for (var i = 0; i < contentLength - 1; i++)
{
content[i] = 'a';
}
content[contentLength - 1] = 'b';
var contentString = new string(content);
var stream = TestRazorSourceDocument.CreateStreamContent(new string(content));
var reader = new StreamReader(stream, true);
var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml");
// Act
var output = new char[contentLength];
for (var i = 0; i < document.Length; i++)
{
output[i] = document[i];
}
var outputString = new string(output);
// Assert
Assert.Equal(contentLength, document.Length);
Assert.Equal(contentString, outputString);
}
[Theory]
[InlineData("test.cshtml")]
[InlineData(null)]
public void Filename(string fileName)
{
// Arrange
var stream = TestRazorSourceDocument.CreateStreamContent("abc");
var reader = new StreamReader(stream, true);
// Act
var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, fileName);
// Assert
Assert.Equal(fileName, document.Filename);
}
[Fact]
public void Lines()
{
// Arrange
var stream = TestRazorSourceDocument.CreateStreamContent("abc\ndef\nghi");
var reader = new StreamReader(stream, true);
// Act
var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml");
// Assert
Assert.Equal(3, document.Lines.Count);
}
[Theory]
[InlineData("", 0, 0, 0)] // Nothing to copy
[InlineData("a", 0, 100, 1)] // Destination index different from start
[InlineData("j", ChunkTestLength - 1, 0, 1)] // One char just before the chunk limit
[InlineData("k", ChunkTestLength, 0, 1)] // One char one the chunk limit
[InlineData("l", ChunkTestLength + 1, 0, 1)] // One char just after the chunk limit
[InlineData("jk", ChunkTestLength - 1, 0, 2)] // Two char that are on both chunk sides
[InlineData("abcdefghijklmnopqrstuvwxy", 0, 100, 25)] // Everything except the last
[InlineData("abcdefghijklmnopqrstuvwxyz", 0, 0, 26)] // Copy all
[InlineData("xyz", 23, 0, 3)] // The last chars
public void CopyTo(string expected, int sourceIndex, int destinationIndex, int count)
{
// Arrange
var stream = TestRazorSourceDocument.CreateStreamContent("abcdefghijklmnopqrstuvwxyz");
var reader = new StreamReader(stream, true);
var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml");
// Act
var destination = new char[1000];
document.CopyTo(sourceIndex, destination, destinationIndex, count);
// Assert
var copy = new string(destination, destinationIndex, count);
Assert.Equal(expected, copy);
}
}
}

View File

@ -94,5 +94,20 @@ namespace Microsoft.AspNetCore.Razor.Evolution
() => RazorSourceDocument.ReadFrom(content, "file.cshtml", Encoding.UTF8));
Assert.Equal(expectedMessage, exception.Message);
}
[Fact]
public void ReadFrom_LargeContent()
{
// Arrange
var content = TestRazorSourceDocument.CreateStreamContent(new string('a', 100000));
// Act
var document = RazorSourceDocument.ReadFrom(content, "file.cshtml");
// Assert
Assert.IsType<LargeTextRazorSourceDocument>(document);
Assert.Equal("file.cshtml", document.Filename);
Assert.Same(Encoding.UTF8, document.Encoding);
}
}
}