Refactored Html parser - part 1

(cherry picked from commit dotnet/aspnetcore-tooling@31f53d44b5)
\n\nCommit migrated from b154e53f06
This commit is contained in:
Ajay Bhargav Baaskaran 2018-11-20 17:16:07 -08:00 committed by Ryan Nowak
parent 6fa46903e2
commit 5bb16b1a14
8 changed files with 2408 additions and 1692 deletions

View File

@ -9,6 +9,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
internal class DirectiveHtmlTokenizer : HtmlTokenizer
{
private bool _visitedFirstTokenStart = false;
private SourceLocation _firstTokenVisitLocation = SourceLocation.Undefined;
public DirectiveHtmlTokenizer(ITextDocument source) : base(source)
{
@ -16,10 +17,12 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
protected override StateResult Dispatch()
{
var location = CurrentLocation;
var result = base.Dispatch();
if (result.Result != null && IsValidTokenType(result.Result.Kind))
{
_visitedFirstTokenStart = true;
_firstTokenVisitLocation = location;
}
return result;
@ -31,8 +34,10 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
Debug.Assert(Buffer.Length == 0);
StartToken();
if (EndOfFile || _visitedFirstTokenStart)
if (EndOfFile || (_visitedFirstTokenStart && _firstTokenVisitLocation != CurrentLocation))
{
// We also need to make sure we are currently past the position where we found the first token.
// If the position is equal, that means the parser put the token back for later parsing.
return null;
}

View File

@ -49,6 +49,8 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
public bool InTemplateContext { get; set; }
public bool StartOfLine { get; set; }
public AcceptedCharactersInternal LastAcceptedCharacters { get; set; } = AcceptedCharactersInternal.None;
public bool EndOfFile

View File

@ -100,5 +100,26 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
{
return CharUnicodeInfo.GetUnicodeCategory(value) == UnicodeCategory.DecimalDigitNumber;
}
// From http://dev.w3.org/html5/spec/Overview.html#elements-0
public static readonly HashSet<string> VoidElements = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"area",
"base",
"br",
"col",
"command",
"embed",
"hr",
"img",
"input",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr"
};
}
}

View File

@ -0,0 +1,23 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
namespace Microsoft.AspNetCore.Razor.Language.Legacy
{
internal enum ParserState
{
CData,
CodeTransition,
DoubleTransition,
EOF,
MarkupComment,
MarkupText,
Misc,
RazorComment,
SpecialTag,
Tag,
Unknown,
XmlPI,
}
}

View File

@ -243,6 +243,11 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
return true;
}
protected internal IEnumerable<SyntaxToken> ReadWhile(params SyntaxKind[] types)
{
return ReadWhile(token => types.Any(expected => expected == token.Kind));
}
protected internal IEnumerable<SyntaxToken> ReadWhile(Func<SyntaxToken, bool> condition)
{
return ReadWhileLazy(condition).ToList();
@ -409,12 +414,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
{
foreach (var token in tokens)
{
foreach (var error in token.GetDiagnostics())
{
Context.ErrorSink.OnError(error);
}
TokenBuilder.Add(token);
Accept(token);
}
}
@ -422,6 +422,15 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
{
if (token != null)
{
if (token.Kind == SyntaxKind.NewLine)
{
Context.StartOfLine = true;
}
else if (token.Kind != SyntaxKind.Whitespace)
{
Context.StartOfLine = false;
}
foreach (var error in token.GetDiagnostics())
{
Context.ErrorSink.OnError(error);

View File

@ -11,27 +11,6 @@ namespace Microsoft.AspNetCore.Razor.Language.Syntax
{
internal static class SyntaxNodeExtensions
{
// From http://dev.w3.org/html5/spec/Overview.html#elements-0
private static readonly HashSet<string> VoidElements = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"area",
"base",
"br",
"col",
"command",
"embed",
"hr",
"img",
"input",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr"
};
public static TNode WithAnnotations<TNode>(this TNode node, params SyntaxAnnotation[] annotations) where TNode : SyntaxNode
{
if (node == null)
@ -333,7 +312,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Syntax
throw new ArgumentNullException(nameof(tagBlock));
}
return VoidElements.Contains(tagBlock.GetTagName());
return ParserHelpers.VoidElements.Contains(tagBlock.GetTagName());
}
}
}