Refactored Html parser - part 1

This commit is contained in:
Ajay Bhargav Baaskaran 2018-11-20 17:16:07 -08:00
parent 46242b307c
commit 31f53d44b5
10 changed files with 2422 additions and 1704 deletions

View File

@ -9,6 +9,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
internal class DirectiveHtmlTokenizer : HtmlTokenizer
{
private bool _visitedFirstTokenStart = false;
private SourceLocation _firstTokenVisitLocation = SourceLocation.Undefined;
public DirectiveHtmlTokenizer(ITextDocument source) : base(source)
{
@ -16,10 +17,12 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
protected override StateResult Dispatch()
{
var location = CurrentLocation;
var result = base.Dispatch();
if (result.Result != null && IsValidTokenType(result.Result.Kind))
{
_visitedFirstTokenStart = true;
_firstTokenVisitLocation = location;
}
return result;
@ -31,8 +34,10 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
Debug.Assert(Buffer.Length == 0);
StartToken();
if (EndOfFile || _visitedFirstTokenStart)
if (EndOfFile || (_visitedFirstTokenStart && _firstTokenVisitLocation != CurrentLocation))
{
// We also need to make sure we are currently past the position where we found the first token.
// If the position is equal, that means the parser put the token back for later parsing.
return null;
}

View File

@ -49,6 +49,8 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
public bool InTemplateContext { get; set; }
public bool StartOfLine { get; set; }
public AcceptedCharactersInternal LastAcceptedCharacters { get; set; } = AcceptedCharactersInternal.None;
public bool EndOfFile

View File

@ -100,5 +100,26 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
{
return CharUnicodeInfo.GetUnicodeCategory(value) == UnicodeCategory.DecimalDigitNumber;
}
// From http://dev.w3.org/html5/spec/Overview.html#elements-0
public static readonly HashSet<string> VoidElements = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"area",
"base",
"br",
"col",
"command",
"embed",
"hr",
"img",
"input",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr"
};
}
}

View File

@ -0,0 +1,23 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
namespace Microsoft.AspNetCore.Razor.Language.Legacy
{
internal enum ParserState
{
CData,
CodeTransition,
DoubleTransition,
EOF,
MarkupComment,
MarkupText,
Misc,
RazorComment,
SpecialTag,
Tag,
Unknown,
XmlPI,
}
}

View File

@ -243,6 +243,11 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
return true;
}
protected internal IEnumerable<SyntaxToken> ReadWhile(params SyntaxKind[] types)
{
return ReadWhile(token => types.Any(expected => expected == token.Kind));
}
protected internal IEnumerable<SyntaxToken> ReadWhile(Func<SyntaxToken, bool> condition)
{
return ReadWhileLazy(condition).ToList();
@ -409,12 +414,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
{
foreach (var token in tokens)
{
foreach (var error in token.GetDiagnostics())
{
Context.ErrorSink.OnError(error);
}
TokenBuilder.Add(token);
Accept(token);
}
}
@ -422,6 +422,15 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
{
if (token != null)
{
if (token.Kind == SyntaxKind.NewLine)
{
Context.StartOfLine = true;
}
else if (token.Kind != SyntaxKind.Whitespace)
{
Context.StartOfLine = false;
}
foreach (var error in token.GetDiagnostics())
{
Context.ErrorSink.OnError(error);

View File

@ -11,27 +11,6 @@ namespace Microsoft.AspNetCore.Razor.Language.Syntax
{
internal static class SyntaxNodeExtensions
{
// From http://dev.w3.org/html5/spec/Overview.html#elements-0
private static readonly HashSet<string> VoidElements = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"area",
"base",
"br",
"col",
"command",
"embed",
"hr",
"img",
"input",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr"
};
public static TNode WithAnnotations<TNode>(this TNode node, params SyntaxAnnotation[] annotations) where TNode : SyntaxNode
{
if (node == null)
@ -333,7 +312,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Syntax
throw new ArgumentNullException(nameof(tagBlock));
}
return VoidElements.Contains(tagBlock.GetTagName());
return ParserHelpers.VoidElements.Contains(tagBlock.GetTagName());
}
}
}

View File

@ -84,7 +84,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
var sut = CreateTestParserForContent("---->");
// Act & Assert
Assert.True(sut.IsHtmlCommentAhead());
Assert.True(sut.LegacyIsHtmlCommentAhead());
}
[Fact]
@ -94,7 +94,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
var sut = CreateTestParserForContent("-- Some comment content in here -->");
// Act & Assert
Assert.True(sut.IsHtmlCommentAhead());
Assert.True(sut.LegacyIsHtmlCommentAhead());
}
[Fact]
@ -104,7 +104,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
var sut = CreateTestParserForContent("-- Some comment content in here ----->");
// Act & Assert
Assert.True(sut.IsHtmlCommentAhead());
Assert.True(sut.LegacyIsHtmlCommentAhead());
}
[Fact]
@ -114,7 +114,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
var sut = CreateTestParserForContent("-- Some comment content in here <!--->");
// Act & Assert
Assert.False(sut.IsHtmlCommentAhead());
Assert.False(sut.LegacyIsHtmlCommentAhead());
}
[Fact]
@ -124,7 +124,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
var sut = CreateTestParserForContent("-- comment --> the first part is a valid comment without the Open angle and bang tokens");
// Act & Assert
Assert.True(sut.IsHtmlCommentAhead());
Assert.True(sut.LegacyIsHtmlCommentAhead());
}
[Fact]
@ -134,7 +134,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
var sut = CreateTestParserForContent("-- not closed comment");
// Act & Assert
Assert.False(sut.IsHtmlCommentAhead());
Assert.False(sut.LegacyIsHtmlCommentAhead());
}
[Fact]
@ -144,7 +144,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
var sut = CreateTestParserForContent("-- not closed comment--");
// Act & Assert
Assert.False(sut.IsHtmlCommentAhead());
Assert.False(sut.LegacyIsHtmlCommentAhead());
}
[Fact]
@ -154,7 +154,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
var sut = CreateTestParserForContent("-- not closed @DateTime.Now comment-->");
// Act & Assert
Assert.True(sut.IsHtmlCommentAhead());
Assert.True(sut.LegacyIsHtmlCommentAhead());
}
[Fact]
@ -202,9 +202,9 @@ namespace Microsoft.AspNetCore.Razor.Language.Test.Legacy
get => base.PreviousToken;
}
public new bool IsHtmlCommentAhead()
public new bool LegacyIsHtmlCommentAhead()
{
return base.IsHtmlCommentAhead();
return base.LegacyIsHtmlCommentAhead();
}
public TestHtmlMarkupParser(ParserContext context) : base(context)

View File

@ -34,8 +34,10 @@ Document -
IntermediateToken - (804:26,9 [10] CSharp7.cshtml) - Html - \n
CSharpExpression - (816:27,10 [34] CSharp7.cshtml)
IntermediateToken - (816:27,10 [34] CSharp7.cshtml) - CSharp - (First: "John", Last: "Doe").First
HtmlContent - (872:28,0 [18] CSharp7.cshtml)
IntermediateToken - (872:28,0 [4] CSharp7.cshtml) - Html -
HtmlContent - (851:27,45 [1] CSharp7.cshtml)
IntermediateToken - (851:27,45 [1] CSharp7.cshtml) - Html -
HtmlContent - (870:27,64 [20] CSharp7.cshtml)
IntermediateToken - (870:27,64 [6] CSharp7.cshtml) - Html - \n
IntermediateToken - (876:28,4 [6] CSharp7.cshtml) - Html - </div>
IntermediateToken - (882:28,10 [8] CSharp7.cshtml) - Html - \n\n
CSharpCode - (891:30,5 [291] CSharp7.cshtml)