From dee8d8694f7e91ee5827260bb10bd624040ba54d Mon Sep 17 00:00:00 2001
From: Yves57 <Yves57@users.noreply.github.com>
Date: Tue, 7 Mar 2017 01:50:05 +0100
Subject: [PATCH] Prevent LOH allocations when constructing large Razor Source
 Documents. (#1049)

* Prevent LOH allocations when constructing large Razor Source Documents.

* Feedback
---
 .../DefaultRazorSourceDocument.cs             | 115 +---------------
 .../DefaultRazorSourceLineCollection.cs       | 123 +++++++++++++++++
 .../LargeTextRazorSourceDocument.cs           | 125 ++++++++++++++++++
 .../RazorSourceDocument.cs                    |  20 ++-
 .../LargeTextRazorSourceDocumentTest.cs       | 106 +++++++++++++++
 .../RazorSourceDocumentTest.cs                |  15 +++
 6 files changed, 388 insertions(+), 116 deletions(-)
 create mode 100644 src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceLineCollection.cs
 create mode 100644 src/Microsoft.AspNetCore.Razor.Evolution/LargeTextRazorSourceDocument.cs
 create mode 100644 test/Microsoft.AspNetCore.Razor.Evolution.Test/LargeTextRazorSourceDocumentTest.cs
diff --git a/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceDocument.cs b/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceDocument.cs
index 01b5866d92..353ccb222b 100644
--- a/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceDocument.cs
+++ b/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceDocument.cs
@@ -30,7 +30,7 @@ namespace Microsoft.AspNetCore.Razor.Evolution
             Encoding = encoding;
             Filename = filename;
 
-            _lines = new LineCollection(this, LineCollection.GetLineStarts(content));
+            _lines = new DefaultRazorSourceLineCollection(this);
         }
 
         public override char this[int position] => _content[position];
@@ -72,118 +72,5 @@ namespace Microsoft.AspNetCore.Razor.Evolution
 
             _content.CopyTo(sourceIndex, destination, destinationIndex, count);
         }
-
-        private class LineCollection : RazorSourceLineCollection
-        {
-            private readonly DefaultRazorSourceDocument _document;
-            private readonly int[] _lineStarts;
-
-            public LineCollection(DefaultRazorSourceDocument document, int[] lineStarts)
-            {
-                _document = document;
-                _lineStarts = lineStarts;
-            }
-
-            public override int Count => _lineStarts.Length;
-
-            public override int GetLineLength(int index)
-            {
-                if (index < 0 || index >= _lineStarts.Length)
-                {
-                    throw new IndexOutOfRangeException(nameof(index));
-                }
-
-                if (index == _lineStarts.Length - 1)
-                {
-                    // Last line is special.
-                    return _document.Length - _lineStarts[index];
-                }
-
-                return _lineStarts[index + 1] - _lineStarts[index];
-            }
-
-            internal override SourceLocation GetLocation(int position)
-            {
-                if (position < 0 || position >= _document.Length)
-                {
-                    throw new IndexOutOfRangeException(nameof(position));
-                }
-
-                var index = Array.BinarySearch<int>(_lineStarts, position);
-                if (index >= 0)
-                {
-                    // We have an exact match for the start of a line.
-                    Debug.Assert(_lineStarts[index] == position);
-
-                    return new SourceLocation(_document.Filename, position, index, characterIndex: 0);
-                }
-
-
-                // Index is the complement of the line *after* the one we want, because BinarySearch tells
-                // us where we'd put position *if* it were the start of a line.
-                index = (~index) - 1;
-                if (index == -1)
-                {
-                    // There's no preceding line, so it's based on the start of the string
-                    return new SourceLocation(_document.Filename, position, 0, position);
-                }
-                else
-                {
-                    var characterIndex = position - _lineStarts[index];
-                    return new SourceLocation(_document.Filename, position, index, characterIndex);
-                }
-            }
-
-            public static int[] GetLineStarts(string text)
-            {
-                var starts = new List<int>();
-
-                // We always consider a document to have at least a 0th line, even if it's empty.
-                starts.Add(0);
-
-                var unprocessedCR = false;
-
-                // Length - 1 because we don't care if there was a linebreak as the last character.
-                for (var i = 0; i < text.Length - 1; i++)
-                {
-                    var c = text[i];
-                    var isLineBreak = false;
-
-                    switch (c)
-                    {
-                        case '\r':
-                            unprocessedCR = true;
-                            continue;
-
-                        case '\n':
-                            unprocessedCR = false;
-                            isLineBreak = true;
-                            break;
-
-                        case '\u0085':
-                        case '\u2028':
-                        case '\u2029':
-                            isLineBreak = true;
-                            break;
-
-                    }
-
-                    if (unprocessedCR)
-                    {
-                        // If we get here it means that we had a CR followed by something other than an LF.
-                        // Add the CR as a line break.
-                        starts.Add(i);
-                        unprocessedCR = false;
-                    }
-
-                    if (isLineBreak)
-                    {
-                        starts.Add(i + 1);
-                    }
-                }
-
-                return starts.ToArray();
-            }
-        }
     }
 }
diff --git a/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceLineCollection.cs b/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceLineCollection.cs
new file mode 100644
index 0000000000..53e8c43f66
--- /dev/null
+++ b/src/Microsoft.AspNetCore.Razor.Evolution/DefaultRazorSourceLineCollection.cs
@@ -0,0 +1,123 @@
+﻿// Copyright (c) .NET Foundation. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace Microsoft.AspNetCore.Razor.Evolution
+{
+    internal class DefaultRazorSourceLineCollection : RazorSourceLineCollection
+    {
+        private readonly RazorSourceDocument _document;
+        private readonly int[] _lineStarts;
+
+        public DefaultRazorSourceLineCollection(RazorSourceDocument document)
+        {
+            _document = document;
+            _lineStarts = GetLineStarts();
+        }
+
+        public override int Count => _lineStarts.Length;
+
+        public override int GetLineLength(int index)
+        {
+            if (index < 0 || index >= _lineStarts.Length)
+            {
+                throw new IndexOutOfRangeException(nameof(index));
+            }
+
+            if (index == _lineStarts.Length - 1)
+            {
+                // Last line is special.
+                return _document.Length - _lineStarts[index];
+            }
+
+            return _lineStarts[index + 1] - _lineStarts[index];
+        }
+
+        internal override SourceLocation GetLocation(int position)
+        {
+            if (position < 0 || position >= _document.Length)
+            {
+                throw new IndexOutOfRangeException(nameof(position));
+            }
+
+            var index = Array.BinarySearch<int>(_lineStarts, position);
+            if (index >= 0)
+            {
+                // We have an exact match for the start of a line.
+                Debug.Assert(_lineStarts[index] == position);
+
+                return new SourceLocation(_document.Filename, position, index, characterIndex: 0);
+            }
+
+
+            // Index is the complement of the line *after* the one we want, because BinarySearch tells
+            // us where we'd put position *if* it were the start of a line.
+            index = (~index) - 1;
+            if (index == -1)
+            {
+                // There's no preceding line, so it's based on the start of the string
+                return new SourceLocation(_document.Filename, position, 0, position);
+            }
+            else
+            {
+                var characterIndex = position - _lineStarts[index];
+                return new SourceLocation(_document.Filename, position, index, characterIndex);
+            }
+        }
+
+        private int[] GetLineStarts()
+        {
+            var starts = new List<int>();
+
+            // We always consider a document to have at least a 0th line, even if it's empty.
+            starts.Add(0);
+
+            var unprocessedCR = false;
+
+            // Length - 1 because we don't care if there was a linebreak as the last character.
+            var length = _document.Length;
+            for (var i = 0; i < length - 1; i++)
+            {
+                var c = _document[i];
+                var isLineBreak = false;
+
+                switch (c)
+                {
+                    case '\r':
+                        unprocessedCR = true;
+                        continue;
+
+                    case '\n':
+                        unprocessedCR = false;
+                        isLineBreak = true;
+                        break;
+
+                    case '\u0085':
+                    case '\u2028':
+                    case '\u2029':
+                        isLineBreak = true;
+                        break;
+
+                }
+
+                if (unprocessedCR)
+                {
+                    // If we get here it means that we had a CR followed by something other than an LF.
+                    // Add the CR as a line break.
+                    starts.Add(i);
+                    unprocessedCR = false;
+                }
+
+                if (isLineBreak)
+                {
+                    starts.Add(i + 1);
+                }
+            }
+
+            return starts.ToArray();
+        }
+    }
+}
diff --git a/src/Microsoft.AspNetCore.Razor.Evolution/LargeTextRazorSourceDocument.cs b/src/Microsoft.AspNetCore.Razor.Evolution/LargeTextRazorSourceDocument.cs
new file mode 100644
index 0000000000..5d9b6eb9d2
--- /dev/null
+++ b/src/Microsoft.AspNetCore.Razor.Evolution/LargeTextRazorSourceDocument.cs
@@ -0,0 +1,125 @@
+﻿// Copyright (c) .NET Foundation. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Microsoft.AspNetCore.Razor.Evolution
+{
+    internal class LargeTextRazorSourceDocument : RazorSourceDocument
+    {
+        private readonly List<char[]> _chunks;
+
+        private readonly int _chunkMaxLength;
+
+        private readonly RazorSourceLineCollection _lines;
+
+        private readonly int _length;
+
+        public LargeTextRazorSourceDocument(StreamReader reader, int chunkMaxLength, Encoding encoding, string filename)
+        {
+            if (reader == null)
+            {
+                throw new ArgumentNullException(nameof(reader));
+            }
+
+            if (encoding == null)
+            {
+                throw new ArgumentNullException(nameof(encoding));
+            }
+
+            _chunkMaxLength = chunkMaxLength;
+            Encoding = encoding;
+            Filename = filename;
+
+            ReadChunks(reader, _chunkMaxLength, out _length, out _chunks);
+            _lines = new DefaultRazorSourceLineCollection(this);
+        }
+
+        public override char this[int position]
+        {
+            get
+            {
+                var chunkIndex = position / _chunkMaxLength;
+                var insideChunkPosition = position % _chunkMaxLength;
+
+                return _chunks[chunkIndex][insideChunkPosition];
+            }
+        }
+
+        public override Encoding Encoding { get; }
+
+        public override string Filename { get; }
+
+        public override int Length => _length;
+
+        public override RazorSourceLineCollection Lines => _lines;
+
+        public override void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
+        {
+            if (destination == null)
+            {
+                throw new ArgumentNullException(nameof(destination));
+            }
+
+            if (sourceIndex < 0)
+            {
+                throw new ArgumentOutOfRangeException(nameof(sourceIndex));
+            }
+
+            if (destinationIndex < 0)
+            {
+                throw new ArgumentOutOfRangeException(nameof(destinationIndex));
+            }
+
+            if (count < 0 || count > Length - sourceIndex || count > destination.Length - destinationIndex)
+            {
+                throw new ArgumentOutOfRangeException(nameof(count));
+            }
+
+            if (count == 0)
+            {
+                return;
+            }
+
+            var chunkIndex = sourceIndex / _chunkMaxLength;
+            var insideChunkPosition = sourceIndex % _chunkMaxLength;
+            var remaining = count;
+            var currentDestIndex = destinationIndex;
+
+            while (remaining > 0)
+            {
+                var toCopy = Math.Min(remaining, _chunkMaxLength - insideChunkPosition);
+                Array.Copy(_chunks[chunkIndex], insideChunkPosition, destination, currentDestIndex, toCopy);
+
+                remaining -= toCopy;
+                currentDestIndex += toCopy;
+                chunkIndex++;
+                insideChunkPosition = 0;
+            }
+        }
+
+        private static void ReadChunks(StreamReader reader, int chunkMaxLength, out int length, out List<char[]> chunks)
+        {
+            length = 0;
+            chunks = new List<char[]>();
+
+            int read;
+            do
+            {
+                var chunk = new char[chunkMaxLength];
+                read = reader.ReadBlock(chunk, 0, chunkMaxLength);
+
+                length += read;
+
+                if (read > 0)
+                {
+                    chunks.Add(chunk);
+                }
+            }
+            while (read == chunkMaxLength);
+        }
+    }
+}
diff --git a/src/Microsoft.AspNetCore.Razor.Evolution/RazorSourceDocument.cs b/src/Microsoft.AspNetCore.Razor.Evolution/RazorSourceDocument.cs
index 9d7505b657..d1e1565937 100644
--- a/src/Microsoft.AspNetCore.Razor.Evolution/RazorSourceDocument.cs
+++ b/src/Microsoft.AspNetCore.Razor.Evolution/RazorSourceDocument.cs
@@ -9,6 +9,8 @@ namespace Microsoft.AspNetCore.Razor.Evolution
 {
     public abstract class RazorSourceDocument
     {
+        private const int LargeObjectHeapLimitInChars = 40 * 1024; // 40K Unicode chars is 80KB which is less than the large object heap limit.
+
         internal static readonly RazorSourceDocument[] EmptyArray = new RazorSourceDocument[0];
 
         public abstract Encoding Encoding { get; }
@@ -75,16 +77,18 @@ namespace Microsoft.AspNetCore.Razor.Evolution
 
             if (streamLength > 0)
             {
+                var bufferSize = Math.Min(streamLength, LargeObjectHeapLimitInChars);
+
                 var reader = new StreamReader(
                     stream,
                     contentEncoding,
                     detectEncodingFromByteOrderMarks: true,
-                    bufferSize: streamLength,
+                    bufferSize: bufferSize,
                     leaveOpen: true);
 
                 using (reader)
                 {
-                    content = reader.ReadToEnd();
+                    reader.Peek();      // Just to populate the encoding
 
                     if (encoding == null)
                     {
@@ -97,6 +101,18 @@ namespace Microsoft.AspNetCore.Razor.Evolution
                                 encoding.EncodingName,
                                 reader.CurrentEncoding.EncodingName));
                     }
+
+                    if (streamLength > LargeObjectHeapLimitInChars)
+                    {
+                        // If the resulting string would end up on the large object heap, then use LargeTextRazorSourceDocument.
+                        return new LargeTextRazorSourceDocument(
+                            reader,
+                            LargeObjectHeapLimitInChars,
+                            contentEncoding,
+                            filename);
+                    }
+
+                    content = reader.ReadToEnd();
                 }
             }
 
diff --git a/test/Microsoft.AspNetCore.Razor.Evolution.Test/LargeTextRazorSourceDocumentTest.cs b/test/Microsoft.AspNetCore.Razor.Evolution.Test/LargeTextRazorSourceDocumentTest.cs
new file mode 100644
index 0000000000..71778bf358
--- /dev/null
+++ b/test/Microsoft.AspNetCore.Razor.Evolution.Test/LargeTextRazorSourceDocumentTest.cs
@@ -0,0 +1,106 @@
+﻿using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using Xunit;
+
+namespace Microsoft.AspNetCore.Razor.Evolution.Test
+{
+    public class LargeTextRazorSourceDocumentTest
+    {
+        private const int ChunkTestLength = 10;
+
+        [Theory]
+        [InlineData(ChunkTestLength - 1)]
+        [InlineData(ChunkTestLength)]
+        [InlineData(ChunkTestLength + 1)]
+        [InlineData(ChunkTestLength * 2 - 1)]
+        [InlineData(ChunkTestLength * 2)]
+        [InlineData(ChunkTestLength * 2 + 1)]
+        public void Indexer_ProvidesCharacterAccessToContent(int contentLength)
+        {
+            // Arrange
+            var content = new char[contentLength];
+
+            for (var i = 0; i < contentLength - 1; i++)
+            {
+                content[i] = 'a';
+            }
+            content[contentLength - 1] = 'b';
+            var contentString = new string(content);
+
+            var stream = TestRazorSourceDocument.CreateStreamContent(new string(content));
+            var reader = new StreamReader(stream, true);
+            var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml");
+
+            // Act
+            var output = new char[contentLength];
+            for (var i = 0; i < document.Length; i++)
+            {
+                output[i] = document[i];
+            }
+            var outputString = new string(output);
+
+            // Assert
+            Assert.Equal(contentLength, document.Length);
+            Assert.Equal(contentString, outputString);
+        }
+
+        [Theory]
+        [InlineData("test.cshtml")]
+        [InlineData(null)]
+        public void Filename(string fileName)
+        {
+            // Arrange
+            var stream = TestRazorSourceDocument.CreateStreamContent("abc");
+            var reader = new StreamReader(stream, true);
+
+            // Act
+            var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, fileName);
+
+            // Assert
+            Assert.Equal(fileName, document.Filename);
+        }
+
+        [Fact]
+        public void Lines()
+        {
+            // Arrange
+            var stream = TestRazorSourceDocument.CreateStreamContent("abc\ndef\nghi");
+            var reader = new StreamReader(stream, true);
+
+            // Act
+            var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml");
+
+            // Assert
+            Assert.Equal(3, document.Lines.Count);
+        }
+
+        [Theory]
+        [InlineData("", 0, 0, 0)]                                   // Nothing to copy
+        [InlineData("a", 0, 100, 1)]                                // Destination index different from start
+        [InlineData("j", ChunkTestLength - 1, 0, 1)]                // One char just before the chunk limit
+        [InlineData("k", ChunkTestLength, 0, 1)]                    // One char one the chunk limit
+        [InlineData("l", ChunkTestLength + 1, 0, 1)]                // One char just after the chunk limit
+        [InlineData("jk", ChunkTestLength - 1, 0, 2)]               // Two char that are on both chunk sides
+        [InlineData("abcdefghijklmnopqrstuvwxy", 0, 100, 25)]       // Everything except the last
+        [InlineData("abcdefghijklmnopqrstuvwxyz", 0, 0, 26)]        // Copy all
+        [InlineData("xyz", 23, 0, 3)]                               // The last chars
+        public void CopyTo(string expected, int sourceIndex, int destinationIndex, int count)
+        {
+            // Arrange
+            var stream = TestRazorSourceDocument.CreateStreamContent("abcdefghijklmnopqrstuvwxyz");
+
+            var reader = new StreamReader(stream, true);
+            var document = new LargeTextRazorSourceDocument(reader, ChunkTestLength, Encoding.UTF8, "file.cshtml");
+
+            // Act
+            var destination = new char[1000];
+            document.CopyTo(sourceIndex, destination, destinationIndex, count);
+
+            // Assert
+            var copy = new string(destination, destinationIndex, count);
+            Assert.Equal(expected, copy);
+        }
+    }
+}
diff --git a/test/Microsoft.AspNetCore.Razor.Evolution.Test/RazorSourceDocumentTest.cs b/test/Microsoft.AspNetCore.Razor.Evolution.Test/RazorSourceDocumentTest.cs
index 9a702a91da..fc80d816d0 100644
--- a/test/Microsoft.AspNetCore.Razor.Evolution.Test/RazorSourceDocumentTest.cs
+++ b/test/Microsoft.AspNetCore.Razor.Evolution.Test/RazorSourceDocumentTest.cs
@@ -94,5 +94,20 @@ namespace Microsoft.AspNetCore.Razor.Evolution
                 () => RazorSourceDocument.ReadFrom(content, "file.cshtml", Encoding.UTF8));
             Assert.Equal(expectedMessage, exception.Message);
         }
+
+        [Fact]
+        public void ReadFrom_LargeContent()
+        {
+            // Arrange
+            var content = TestRazorSourceDocument.CreateStreamContent(new string('a', 100000));
+
+            // Act
+            var document = RazorSourceDocument.ReadFrom(content, "file.cshtml");
+
+            // Assert
+            Assert.IsType<LargeTextRazorSourceDocument>(document);
+            Assert.Equal("file.cshtml", document.Filename);
+            Assert.Same(Encoding.UTF8, document.Encoding);
+        }
     }
 }