// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using AngleSharp;
using AngleSharp.Extensions;
using AngleSharp.Html;
using AngleSharp.Parser.Html;
using Microsoft.AspNetCore.Razor.Language;
using Microsoft.AspNetCore.Razor.Language.Intermediate;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
namespace Microsoft.AspNetCore.Blazor.Razor
{
// Rewrites the standard IR to a format more suitable for Blazor
//
// HTML nodes are rewritten to contain more structure, instead of treating HTML as opaque content
// it is structured into element/component nodes, and attribute nodes.
internal class ComponentDocumentRewritePass : IntermediateNodePassBase, IRazorDocumentClassifierPass
{
// Per the HTML spec, the following elements are inherently self-closing
// For example,
is the same as
(and therefore it cannot contain descendants)
private readonly static HashSet VoidElements = new HashSet(StringComparer.OrdinalIgnoreCase)
{
"area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr",
};
// Run as soon as possible after the Component document classifier
public override int Order => ComponentDocumentClassifierPass.DefaultFeatureOrder + 1;
protected override void ExecuteCore(RazorCodeDocument codeDocument, DocumentIntermediateNode documentNode)
{
if (documentNode.DocumentKind != ComponentDocumentClassifierPass.ComponentDocumentKind)
{
return;
}
var visitor = new RewriteWalker(codeDocument.Source);
visitor.Visit(documentNode);
}
// Visits nodes then rewrites them using a post-order traversal. The result is that the tree
// is rewritten bottom up.
//
// This relies on a few invariants Razor already provides for correctness.
// - Tag Helpers are the only real nesting construct
// - Tag Helpers require properly nested HTML inside their body
//
// This means that when we find a 'container' for HTML content, we have the guarantee
// that the content is properly nested, except at the top level of scope. And since the top
// level isn't nested inside anything, we can't introduce any errors due to misunderstanding
// the structure.
private class RewriteWalker : IntermediateNodeWalker
{
private readonly RazorSourceDocument _source;
public RewriteWalker(RazorSourceDocument source)
{
_source = source;
}
public override void VisitDefault(IntermediateNode node)
{
var foundHtml = false;
for (var i = 0; i < node.Children.Count; i++)
{
var child = node.Children[i];
Visit(child);
if (child is HtmlContentIntermediateNode)
{
foundHtml = true;
}
}
if (foundHtml)
{
RewriteChildren(_source, node);
}
}
public override void VisitHtmlAttribute(HtmlAttributeIntermediateNode node)
{
// Don't rewrite inside of attributes
}
public override void VisitTagHelperHtmlAttribute(TagHelperHtmlAttributeIntermediateNode node)
{
// Don't rewrite inside of attributes
}
public override void VisitTagHelperProperty(TagHelperPropertyIntermediateNode node)
{
// Don't rewrite inside of attributes
}
private void RewriteChildren(RazorSourceDocument source, IntermediateNode node)
{
// We expect all of the immediate children of a node (together) to comprise
// a well-formed tree of elements and components.
var stack = new Stack();
stack.Push(node);
// Make a copy, we will clear and rebuild the child collection of this node.
var children = node.Children.ToArray();
node.Children.Clear();
// Due to the way Anglesharp parses HTML (tags at a time) we need to keep track of some state.
// This handles cases like:
//
//
//
// This will lower like:
//
// HtmlContent
//
// We need to consume HTML until we see the 'end tag' for and then we can
// the attributes from the parsed HTML and the CSharpAttribute value.
var parser = new HtmlParser(source);
var attributes = new List();
for (var i = 0; i < children.Length; i++)
{
if (children[i] is HtmlContentIntermediateNode htmlNode)
{
parser.Push(htmlNode);
var tokens = parser.Get();
foreach (var token in tokens)
{
// We have to call this before get. Anglesharp doesn't return the start position
// of tokens.
var start = parser.GetCurrentLocation();
// We have to set the Location explicitly otherwise we would need to include
// the token in every call to the parser.
parser.SetLocation(token);
var end = parser.GetCurrentLocation();
if (token.Type == HtmlTokenType.EndOfFile)
{
break;
}
switch (token.Type)
{
case HtmlTokenType.Character:
{
// Text content
var span = new SourceSpan(start, end.AbsoluteIndex - start.AbsoluteIndex);
stack.Peek().Children.Add(new HtmlContentIntermediateNode()
{
Children =
{
new IntermediateToken()
{
Content = token.Data,
Kind = TokenKind.Html,
Source = span,
}
},
Source = span,
});
break;
}
case HtmlTokenType.StartTag:
case HtmlTokenType.EndTag:
{
var tag = token.AsTag();
if (token.Type == HtmlTokenType.StartTag)
{
var elementNode = new HtmlElementIntermediateNode()
{
TagName = parser.GetTagNameOriginalCasing(tag),
Source = new SourceSpan(start, end.AbsoluteIndex - start.AbsoluteIndex),
};
stack.Peek().Children.Add(elementNode);
stack.Push(elementNode);
for (var j = 0; j < tag.Attributes.Count; j++)
{
// Unfortunately Anglesharp doesn't provide positions for attributes
// so we can't record the spans here.
var attribute = tag.Attributes[j];
stack.Peek().Children.Add(CreateAttributeNode(attribute));
}
for (var j = 0; j < attributes.Count; j++)
{
stack.Peek().Children.Add(attributes[j]);
}
attributes.Clear();
}
if (tag.IsSelfClosing || VoidElements.Contains(tag.Data))
{
// We can't possibly hit an error here since we just added an element node.
stack.Pop();
}
if (token.Type == HtmlTokenType.EndTag)
{
var popped = stack.Pop();
if (stack.Count == 0)
{
// If we managed to 'bottom out' the stack then we have an unbalanced end tag.
// Put back the current node so we don't crash.
stack.Push(popped);
var tagName = parser.GetTagNameOriginalCasing(token.AsTag());
var span = new SourceSpan(start, end.AbsoluteIndex - start.AbsoluteIndex);
var diagnostic = BlazorDiagnosticFactory.Create_UnexpectedClosingTag(span, tagName);
popped.Children.Add(new HtmlElementIntermediateNode()
{
Diagnostics =
{
diagnostic,
},
TagName = tagName,
Source = span,
});
}
else if (!string.Equals(tag.Name, ((HtmlElementIntermediateNode)popped).TagName, StringComparison.OrdinalIgnoreCase))
{
var span = new SourceSpan(start, end.AbsoluteIndex - start.AbsoluteIndex);
var diagnostic = BlazorDiagnosticFactory.Create_MismatchedClosingTag(span, ((HtmlElementIntermediateNode)popped).TagName, token.Data);
popped.Diagnostics.Add(diagnostic);
}
else
{
// Happy path.
//
// We need to compute a new source span because when we found the start tag before we knew
// the end poosition of the tag.
var length = end.AbsoluteIndex - popped.Source.Value.AbsoluteIndex;
popped.Source = new SourceSpan(
popped.Source.Value.FilePath,
popped.Source.Value.AbsoluteIndex,
popped.Source.Value.LineIndex,
popped.Source.Value.CharacterIndex,
length);
}
}
break;
}
case HtmlTokenType.Comment:
break;
default:
throw new InvalidCastException($"Unsupported token type: {token.Type.ToString()}");
}
}
}
else if (children[i] is HtmlAttributeIntermediateNode htmlAttribute)
{
// Buffer the attribute for now, it will get written out as part of a tag.
attributes.Add(htmlAttribute);
}
else
{
// not HTML, or already rewritten.
stack.Peek().Children.Add(children[i]);
}
}
var extraContent = parser.GetUnparsedContent();
if (!string.IsNullOrEmpty(extraContent))
{
// extra HTML - almost certainly invalid because it couldn't be parsed.
var start = parser.GetCurrentLocation();
var end = parser.GetCurrentLocation(extraContent.Length);
var span = new SourceSpan(start, end.AbsoluteIndex - start.AbsoluteIndex);
stack.Peek().Children.Add(new HtmlContentIntermediateNode()
{
Children =
{
new IntermediateToken()
{
Content = extraContent,
Kind = TokenKind.Html,
Source = span,
}
},
Diagnostics =
{
BlazorDiagnosticFactory.Create_InvalidHtmlContent(span, extraContent),
},
Source = span,
});
}
while (stack.Count > 1)
{
// not balanced
var popped = (HtmlElementIntermediateNode)stack.Pop();
var diagnostic = BlazorDiagnosticFactory.Create_UnclosedTag(popped.Source, popped.TagName);
popped.Diagnostics.Add(diagnostic);
}
}
}
private static HtmlAttributeIntermediateNode CreateAttributeNode(KeyValuePair attribute)
{
return new HtmlAttributeIntermediateNode()
{
AttributeName = attribute.Key,
Children =
{
new HtmlAttributeValueIntermediateNode()
{
Children =
{
new IntermediateToken()
{
Kind = TokenKind.Html,
Content = attribute.Value,
},
}
},
}
};
}
private static string GetHtmlContent(HtmlContentIntermediateNode node)
{
var builder = new StringBuilder();
for (var i = 0; i < node.Children.Count; i++)
{
var token = node.Children[i] as IntermediateToken;
if (token != null && token.IsHtml)
{
builder.Append(token.Content);
}
}
return builder.ToString();
}
[DebuggerDisplay("{DebuggerDisplay,nq}")]
private class HtmlParser
{
private readonly RazorSourceDocument _source;
// Tracks the offsets between the start of _content and then original source document.
private List<(int offset, int sourceOffset)> _offsets;
private TextSource _textSource;
private int _position;
private string _content;
public HtmlParser(RazorSourceDocument source)
{
_source = source;
}
public void Push(HtmlContentIntermediateNode node)
{
var builder = new StringBuilder();
var offsets = new List<(int offset, int sourceOffset)>();
if (_content != null && _position < _content.Length)
{
offsets.Add((0, _offsets[0].sourceOffset + _position));
builder.Append(_content, _position, _content.Length - _position);
}
for (var i = 0; i < node.Children.Count; i++)
{
var token = node.Children[i] as IntermediateToken;
if (token != null && token.IsHtml)
{
offsets.Add((builder.Length, token.Source.Value.AbsoluteIndex));
builder.Append(token.Content);
}
}
_content = builder.ToString();
_offsets = offsets;
_textSource = new TextSource(_content);
_position = 0;
}
public string GetUnparsedContent()
{
return _position >= _content.Length ? string.Empty : _content.Substring(_position);
}
public IEnumerable Get()
{
if (_textSource == null)
{
throw new InvalidOperationException("You need to call Push first.");
}
var tokens = _textSource.Tokenize(HtmlEntityService.Resolver);
return tokens;
}
public void SetLocation(HtmlToken token)
{
// The tokenizer will advance to the end when you have an unclosed tag.
// We don't want this, we want to resume before the unclosed tag.
if (token.Type != HtmlTokenType.EndOfFile)
{
_position = _textSource.Index;
}
}
public SourceLocation GetCurrentLocation(int offset = 0)
{
var absoluteIndex = GetAbsoluteIndex(_position + offset);
int lineIndex = -1;
int columnIndex = -1;
var remaining = absoluteIndex;
for (var i = 0; i < _source.Lines.Count; i++)
{
var lineLength = _source.Lines.GetLineLength(i);
if (lineLength > remaining)
{
lineIndex = i;
columnIndex = remaining;
break;
}
remaining -= lineLength;
}
return new SourceLocation(_source.FilePath, absoluteIndex, lineIndex, columnIndex);
}
public SourceSpan GetSpan(HtmlToken token)
{
var absoluteIndex = GetAbsoluteIndex(token.Position.Position);
int lineIndex = -1;
int columnIndex = -1;
var remaining = absoluteIndex;
for (var i = 0; i < _source.Lines.Count; i++)
{
var lineLength = _source.Lines.GetLineLength(i);
if (lineLength > remaining)
{
lineIndex = i;
columnIndex = remaining;
break;
}
remaining -= lineLength;
}
var length = GetAbsoluteIndex(_position) - absoluteIndex;
return new SourceSpan(_source.FilePath, absoluteIndex, lineIndex, columnIndex, length);
}
private int GetAbsoluteIndex(int contentIndex)
{
for (var i = _offsets.Count - 1; i >= 0; i--)
{
if (_offsets[i].offset <= contentIndex)
{
return _offsets[i].sourceOffset + (contentIndex - _offsets[i].offset);
}
}
throw new InvalidOperationException("Unexpected index value.");
}
// Anglesharp canonicalizes the case of tags, we want what the user typed.
public string GetTagNameOriginalCasing(HtmlTagToken tag)
{
var offset = tag.Type == HtmlTokenType.EndTag ? 1 : 0; // For end tags, skip the '/'
return tag.Name;
}
private string DebuggerDisplay
{
get
{
if (_content == null)
{
return "Content={}";
}
var builder = new StringBuilder();
builder.Append("Content=");
builder.Append("{");
builder.Append(_content.Substring(0, Math.Min(_position, _content.Length)));
builder.Append("|");
builder.Append(_content.Substring(Math.Min(_position, _content.Length)));
builder.Append("}");
return builder.ToString();
}
}
}
}
}