diff --git a/src/Razor/src/Microsoft.AspNetCore.Razor.Language/Legacy/DirectiveHtmlTokenizer.cs b/src/Razor/src/Microsoft.AspNetCore.Razor.Language/Legacy/DirectiveHtmlTokenizer.cs index 4921c8aa1e..7c69a1c3b8 100644 --- a/src/Razor/src/Microsoft.AspNetCore.Razor.Language/Legacy/DirectiveHtmlTokenizer.cs +++ b/src/Razor/src/Microsoft.AspNetCore.Razor.Language/Legacy/DirectiveHtmlTokenizer.cs @@ -9,6 +9,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy internal class DirectiveHtmlTokenizer : HtmlTokenizer { private bool _visitedFirstTokenStart = false; + private SourceLocation _firstTokenVisitLocation = SourceLocation.Undefined; public DirectiveHtmlTokenizer(ITextDocument source) : base(source) { @@ -16,10 +17,12 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy protected override StateResult Dispatch() { + var location = CurrentLocation; var result = base.Dispatch(); if (result.Result != null && IsValidTokenType(result.Result.Kind)) { _visitedFirstTokenStart = true; + _firstTokenVisitLocation = location; } return result; @@ -31,8 +34,10 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy Debug.Assert(Buffer.Length == 0); StartToken(); - if (EndOfFile || _visitedFirstTokenStart) + if (EndOfFile || (_visitedFirstTokenStart && _firstTokenVisitLocation != CurrentLocation)) { + // We also need to make sure we are currently past the position where we found the first token. + // If the position is equal, that means the parser put the token back for later parsing. return null; } diff --git a/src/Razor/src/Microsoft.AspNetCore.Razor.Language/Legacy/HtmlMarkupParser.Legacy.cs b/src/Razor/src/Microsoft.AspNetCore.Razor.Language/Legacy/HtmlMarkupParser.Legacy.cs new file mode 100644 index 0000000000..ac7b174c7f --- /dev/null +++ b/src/Razor/src/Microsoft.AspNetCore.Razor.Language/Legacy/HtmlMarkupParser.Legacy.cs @@ -0,0 +1,1745 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy +{ + internal partial class HtmlMarkupParser : TokenizerBackedParser + { + private SourceLocation _lastTagStart = SourceLocation.Zero; + private SyntaxToken _bufferedOpenAngle; + + private bool CaseSensitive { get; set; } + + private StringComparison Comparison + { + get { return CaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; } + } + + // Special tags include " As we will be treating this as a comment ending, there is no need to handle this case at all. + // 2.2.3 "--!>" + // 2.3 nor end with the string "" + + if (CurrentToken.Kind != SyntaxKind.DoubleHyphen) + { + return false; + } + + // Check condition 2.1 + if (NextIs(SyntaxKind.CloseAngle) || NextIs(next => IsHyphen(next) && NextIs(SyntaxKind.CloseAngle))) + { + return false; + } + + // Check condition 2.2 + var isValidComment = false; + LookaheadUntil((token, prevTokens) => + { + if (token.Kind == SyntaxKind.DoubleHyphen) + { + if (NextIs(SyntaxKind.CloseAngle)) + { + // Check condition 2.3: We're at the end of a comment. Check to make sure the text ending is allowed. + isValidComment = !IsCommentContentEndingInvalid(prevTokens); + return true; + } + else if (NextIs(ns => IsHyphen(ns) && NextIs(SyntaxKind.CloseAngle))) + { + // Check condition 2.3: we're at the end of a comment, which has an extra dash. + // Need to treat the dash as part of the content and check the ending. + // However, that case would have already been checked as part of check from 2.2.1 which + // would already fail this iteration and we wouldn't get here + isValidComment = true; + return true; + } + else if (NextIs(ns => ns.Kind == SyntaxKind.Bang && NextIs(SyntaxKind.CloseAngle))) + { + // This is condition 2.2.3 + isValidComment = false; + return true; + } + } + else if (token.Kind == SyntaxKind.OpenAngle) + { + // Checking condition 2.2.1 + if (NextIs(ns => ns.Kind == SyntaxKind.Bang && NextIs(SyntaxKind.DoubleHyphen))) + { + isValidComment = false; + return true; + } + } + + return false; + }); + + return isValidComment; + } + + private bool TryParseCData(in SyntaxListBuilder builder) + { + if (CurrentToken.Kind == SyntaxKind.Text && string.Equals(CurrentToken.Content, "cdata", StringComparison.OrdinalIgnoreCase)) + { + if (AcceptAndMoveNext()) + { + if (CurrentToken.Kind == SyntaxKind.LeftBracket) + { + return LegacyAcceptTokenUntilAll(builder, SyntaxKind.RightBracket, SyntaxKind.RightBracket, SyntaxKind.CloseAngle); + } + } + } + + return false; + } + + private bool TryParseXmlPI(in SyntaxListBuilder builder) + { + // Accept "?" + Assert(SyntaxKind.QuestionMark); + AcceptAndMoveNext(); + return LegacyAcceptTokenUntilAll(builder, SyntaxKind.QuestionMark, SyntaxKind.CloseAngle); + } + + private void SkipToEndScriptAndParseCode(in SyntaxListBuilder builder, AcceptedCharactersInternal endTagAcceptedCharacters = AcceptedCharactersInternal.Any) + { + // Special case for