diff --git a/src/Microsoft.AspNetCore.Razor.Language/Legacy/HtmlMarkupParser.cs b/src/Microsoft.AspNetCore.Razor.Language/Legacy/HtmlMarkupParser.cs index fc675b6e47..2fe84b5a33 100644 --- a/src/Microsoft.AspNetCore.Razor.Language/Legacy/HtmlMarkupParser.cs +++ b/src/Microsoft.AspNetCore.Razor.Language/Legacy/HtmlMarkupParser.cs @@ -492,7 +492,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy if (AcceptAndMoveNext()) { - if (CurrentSymbol.Type == HtmlSymbolType.DoubleHyphen) + if (IsHtmlCommentAhead()) { using (Context.Builder.StartBlock(BlockKindInternal.HtmlComment)) { @@ -505,32 +505,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy SkipToAndParseCode(HtmlSymbolType.DoubleHyphen); if (At(HtmlSymbolType.DoubleHyphen)) { - var lastDoubleHyphen = CurrentSymbol; - AcceptWhile(s => - { - if (NextIs(HtmlSymbolType.DoubleHyphen)) - { - lastDoubleHyphen = s; - return true; - } - - NextToken(); - EnsureCurrent(); - return false; - }); - - if (At(HtmlSymbolType.Text) && - string.Equals(CurrentSymbol.Content, "-", StringComparison.Ordinal)) - { - // Doing this here to maintain the order of symbols - if (!NextIs(HtmlSymbolType.CloseAngle)) - { - Accept(lastDoubleHyphen); - lastDoubleHyphen = null; - } - - AcceptAndMoveNext(); - } + var lastDoubleHyphen = AcceptAllButLastDoubleHypens(); if (At(HtmlSymbolType.CloseAngle)) { @@ -541,7 +516,6 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy Accept(lastDoubleHyphen); AcceptAndMoveNext(); Output(SpanKindInternal.Markup, AcceptedCharactersInternal.None); - return true; } else if (lastDoubleHyphen != null) @@ -551,8 +525,6 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy } } } - - return false; } else if (CurrentSymbol.Type == HtmlSymbolType.LeftBracket) { @@ -571,6 +543,125 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy return false; } + private HtmlSymbol AcceptAllButLastDoubleHypens() + { + var lastDoubleHyphen = CurrentSymbol; + AcceptWhile(s => + { + if (NextIs(HtmlSymbolType.DoubleHyphen)) + { + lastDoubleHyphen = s; + return true; + } + + NextToken(); + EnsureCurrent(); + return false; + }); + + if (At(HtmlSymbolType.Text) && IsDashSymbol(CurrentSymbol)) + { + // Doing this here to maintain the order of symbols + if (!NextIs(HtmlSymbolType.CloseAngle)) + { + Accept(lastDoubleHyphen); + lastDoubleHyphen = null; + } + + AcceptAndMoveNext(); + } + + return lastDoubleHyphen; + } + + private static bool IsDashSymbol(HtmlSymbol symbol) + { + return string.Equals(symbol.Content, "-", StringComparison.Ordinal); + } + + private bool IsHtmlCommentAhead() + { + /* + * From HTML5 Specification, available at http://www.w3.org/TR/html52/syntax.html#comments + * + * Comments must have the following format: + * 1. The string "", or "--!>" + * 2.3 nor end with the string "" + * + * */ + + if (CurrentSymbol.Type != HtmlSymbolType.DoubleHyphen) + { + return false; + } + + // Check condition 2.1 + if (NextIs(HtmlSymbolType.CloseAngle) || NextIs(next => IsDashSymbol(next) && NextIs(HtmlSymbolType.CloseAngle))) + { + return false; + } + + // Check condition 2.2 + bool isValidComment = false; + LookaheadUntil((s, p) => + { + bool breakLookahead = false; + if (s.Type == HtmlSymbolType.DoubleHyphen) + { + if (NextIs(HtmlSymbolType.CloseAngle)) + { + // We're at the end of a comment. check the condition 2.3 to make sure the text ending is allowed. + isValidComment = !EndsWithSymbolsSequence(p, HtmlSymbolType.OpenAngle, HtmlSymbolType.Bang, HtmlSymbolType.DoubleHyphen); + breakLookahead = true; + } + else if (NextIs(ns => IsDashSymbol(ns) && NextIs(HtmlSymbolType.CloseAngle))) + { + // This is also a valid closing comment case, as the dashes lookup is treated with DoubleHyphen symbols first. + isValidComment = true; + breakLookahead = true; + } + else if (NextIs(ns => ns.Type == HtmlSymbolType.Bang && NextIs(HtmlSymbolType.CloseAngle))) + { + isValidComment = false; + breakLookahead = true; + } + } + else if (s.Type == HtmlSymbolType.OpenAngle) + { + if (NextIs(ns => ns.Type == HtmlSymbolType.Bang && NextIs(HtmlSymbolType.DoubleHyphen))) + { + isValidComment = false; + breakLookahead = true; + } + } + + return breakLookahead; + }); + + return isValidComment; + } + + private bool EndsWithSymbolsSequence(IEnumerable symbols, params HtmlSymbolType[] sequenceToMatchWith) + { + int index = sequenceToMatchWith.Length; + foreach (var previousSymbol in symbols) + { + if (index == 0) + { + break; + } + + if (sequenceToMatchWith[--index] != previousSymbol.Type) + return false; + } + + return index == 0; + } + private bool CData() { if (CurrentSymbol.Type == HtmlSymbolType.Text && string.Equals(CurrentSymbol.Content, "cdata", StringComparison.OrdinalIgnoreCase)) diff --git a/src/Microsoft.AspNetCore.Razor.Language/Legacy/TokenizerBackedParser.cs b/src/Microsoft.AspNetCore.Razor.Language/Legacy/TokenizerBackedParser.cs index 6f564eb442..1819407076 100644 --- a/src/Microsoft.AspNetCore.Razor.Language/Legacy/TokenizerBackedParser.cs +++ b/src/Microsoft.AspNetCore.Razor.Language/Legacy/TokenizerBackedParser.cs @@ -109,6 +109,53 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy return symbols[count]; } + /// + /// Looks forward until the specified condition is met. + /// + /// A predicate accepting the symbol being evaluated and the list of symbols which have been looped through. + /// true, if the condition was met. false - if the condition wasn't met and the last symbol has already been processed. + /// The list of previous symbols is passed in the reverse order. So the last processed element will be the first one in the list. + protected bool LookaheadUntil(Func, bool> condition) + { + if (condition == null) + { + throw new ArgumentNullException(nameof(condition)); + } + + bool matchFound = false; + + // We add 1 in order to store the current symbol. + var symbols = new List(); + symbols.Add(CurrentSymbol); + + while (true) + { + if (!NextToken()) + { + break; + } + + symbols.Add(CurrentSymbol); + if (condition(CurrentSymbol, symbols.Reverse())) + { + matchFound = true; + break; + } + } + + // Restore Tokenizer's location to where it was pointing before the look-ahead. + for (var i = symbols.Count - 1; i >= 0; i--) + { + PutBack(symbols[i]); + } + + // The PutBacks above will set CurrentSymbol to null. EnsureCurrent will set our CurrentSymbol to the + // next symbol. + EnsureCurrent(); + + return matchFound; + } + protected internal bool NextToken() { PreviousSymbol = CurrentSymbol; diff --git a/test/Microsoft.AspNetCore.Razor.Language.Test/Legacy/HtmlBlockTest.cs b/test/Microsoft.AspNetCore.Razor.Language.Test/Legacy/HtmlBlockTest.cs index f8d4b0fcd9..5d3d46a831 100644 --- a/test/Microsoft.AspNetCore.Razor.Language.Test/Legacy/HtmlBlockTest.cs +++ b/test/Microsoft.AspNetCore.Razor.Language.Test/Legacy/HtmlBlockTest.cs @@ -419,9 +419,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy ParseBlockTest( "