Updated the Comments detection to comply with HTML 5 specification
This commit is contained in:
parent
1796abcbcd
commit
9c3adba40f
|
|
@ -492,7 +492,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
||||||
|
|
||||||
if (AcceptAndMoveNext())
|
if (AcceptAndMoveNext())
|
||||||
{
|
{
|
||||||
if (CurrentSymbol.Type == HtmlSymbolType.DoubleHyphen)
|
if (IsHtmlCommentAhead())
|
||||||
{
|
{
|
||||||
using (Context.Builder.StartBlock(BlockKindInternal.HtmlComment))
|
using (Context.Builder.StartBlock(BlockKindInternal.HtmlComment))
|
||||||
{
|
{
|
||||||
|
|
@ -505,32 +505,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
||||||
SkipToAndParseCode(HtmlSymbolType.DoubleHyphen);
|
SkipToAndParseCode(HtmlSymbolType.DoubleHyphen);
|
||||||
if (At(HtmlSymbolType.DoubleHyphen))
|
if (At(HtmlSymbolType.DoubleHyphen))
|
||||||
{
|
{
|
||||||
var lastDoubleHyphen = CurrentSymbol;
|
var lastDoubleHyphen = AcceptAllButLastDoubleHypens();
|
||||||
AcceptWhile(s =>
|
|
||||||
{
|
|
||||||
if (NextIs(HtmlSymbolType.DoubleHyphen))
|
|
||||||
{
|
|
||||||
lastDoubleHyphen = s;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
NextToken();
|
|
||||||
EnsureCurrent();
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (At(HtmlSymbolType.Text) &&
|
|
||||||
string.Equals(CurrentSymbol.Content, "-", StringComparison.Ordinal))
|
|
||||||
{
|
|
||||||
// Doing this here to maintain the order of symbols
|
|
||||||
if (!NextIs(HtmlSymbolType.CloseAngle))
|
|
||||||
{
|
|
||||||
Accept(lastDoubleHyphen);
|
|
||||||
lastDoubleHyphen = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
AcceptAndMoveNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (At(HtmlSymbolType.CloseAngle))
|
if (At(HtmlSymbolType.CloseAngle))
|
||||||
{
|
{
|
||||||
|
|
@ -541,7 +516,6 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
||||||
Accept(lastDoubleHyphen);
|
Accept(lastDoubleHyphen);
|
||||||
AcceptAndMoveNext();
|
AcceptAndMoveNext();
|
||||||
Output(SpanKindInternal.Markup, AcceptedCharactersInternal.None);
|
Output(SpanKindInternal.Markup, AcceptedCharactersInternal.None);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (lastDoubleHyphen != null)
|
else if (lastDoubleHyphen != null)
|
||||||
|
|
@ -551,8 +525,6 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
else if (CurrentSymbol.Type == HtmlSymbolType.LeftBracket)
|
else if (CurrentSymbol.Type == HtmlSymbolType.LeftBracket)
|
||||||
{
|
{
|
||||||
|
|
@ -571,6 +543,125 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private HtmlSymbol AcceptAllButLastDoubleHypens()
|
||||||
|
{
|
||||||
|
var lastDoubleHyphen = CurrentSymbol;
|
||||||
|
AcceptWhile(s =>
|
||||||
|
{
|
||||||
|
if (NextIs(HtmlSymbolType.DoubleHyphen))
|
||||||
|
{
|
||||||
|
lastDoubleHyphen = s;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
NextToken();
|
||||||
|
EnsureCurrent();
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (At(HtmlSymbolType.Text) && IsDashSymbol(CurrentSymbol))
|
||||||
|
{
|
||||||
|
// Doing this here to maintain the order of symbols
|
||||||
|
if (!NextIs(HtmlSymbolType.CloseAngle))
|
||||||
|
{
|
||||||
|
Accept(lastDoubleHyphen);
|
||||||
|
lastDoubleHyphen = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
AcceptAndMoveNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
return lastDoubleHyphen;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool IsDashSymbol(HtmlSymbol symbol)
|
||||||
|
{
|
||||||
|
return string.Equals(symbol.Content, "-", StringComparison.Ordinal);
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool IsHtmlCommentAhead()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* From HTML5 Specification, available at http://www.w3.org/TR/html52/syntax.html#comments
|
||||||
|
*
|
||||||
|
* Comments must have the following format:
|
||||||
|
* 1. The string "<!--"
|
||||||
|
* 2. Optionally, text, with the additional restriction that the text
|
||||||
|
* 2.1 must not start with the string ">" nor start with the string "->"
|
||||||
|
* 2.2 nor contain the strings "<!--", "-->", or "--!>"
|
||||||
|
* 2.3 nor end with the string "<!-".
|
||||||
|
* 3. The string "-->"
|
||||||
|
*
|
||||||
|
* */
|
||||||
|
|
||||||
|
if (CurrentSymbol.Type != HtmlSymbolType.DoubleHyphen)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check condition 2.1
|
||||||
|
if (NextIs(HtmlSymbolType.CloseAngle) || NextIs(next => IsDashSymbol(next) && NextIs(HtmlSymbolType.CloseAngle)))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check condition 2.2
|
||||||
|
bool isValidComment = false;
|
||||||
|
LookaheadUntil((s, p) =>
|
||||||
|
{
|
||||||
|
bool breakLookahead = false;
|
||||||
|
if (s.Type == HtmlSymbolType.DoubleHyphen)
|
||||||
|
{
|
||||||
|
if (NextIs(HtmlSymbolType.CloseAngle))
|
||||||
|
{
|
||||||
|
// We're at the end of a comment. check the condition 2.3 to make sure the text ending is allowed.
|
||||||
|
isValidComment = !EndsWithSymbolsSequence(p, HtmlSymbolType.OpenAngle, HtmlSymbolType.Bang, HtmlSymbolType.DoubleHyphen);
|
||||||
|
breakLookahead = true;
|
||||||
|
}
|
||||||
|
else if (NextIs(ns => IsDashSymbol(ns) && NextIs(HtmlSymbolType.CloseAngle)))
|
||||||
|
{
|
||||||
|
// This is also a valid closing comment case, as the dashes lookup is treated with DoubleHyphen symbols first.
|
||||||
|
isValidComment = true;
|
||||||
|
breakLookahead = true;
|
||||||
|
}
|
||||||
|
else if (NextIs(ns => ns.Type == HtmlSymbolType.Bang && NextIs(HtmlSymbolType.CloseAngle)))
|
||||||
|
{
|
||||||
|
isValidComment = false;
|
||||||
|
breakLookahead = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (s.Type == HtmlSymbolType.OpenAngle)
|
||||||
|
{
|
||||||
|
if (NextIs(ns => ns.Type == HtmlSymbolType.Bang && NextIs(HtmlSymbolType.DoubleHyphen)))
|
||||||
|
{
|
||||||
|
isValidComment = false;
|
||||||
|
breakLookahead = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return breakLookahead;
|
||||||
|
});
|
||||||
|
|
||||||
|
return isValidComment;
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool EndsWithSymbolsSequence(IEnumerable<HtmlSymbol> symbols, params HtmlSymbolType[] sequenceToMatchWith)
|
||||||
|
{
|
||||||
|
int index = sequenceToMatchWith.Length;
|
||||||
|
foreach (var previousSymbol in symbols)
|
||||||
|
{
|
||||||
|
if (index == 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sequenceToMatchWith[--index] != previousSymbol.Type)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return index == 0;
|
||||||
|
}
|
||||||
|
|
||||||
private bool CData()
|
private bool CData()
|
||||||
{
|
{
|
||||||
if (CurrentSymbol.Type == HtmlSymbolType.Text && string.Equals(CurrentSymbol.Content, "cdata", StringComparison.OrdinalIgnoreCase))
|
if (CurrentSymbol.Type == HtmlSymbolType.Text && string.Equals(CurrentSymbol.Content, "cdata", StringComparison.OrdinalIgnoreCase))
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,53 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
||||||
return symbols[count];
|
return symbols[count];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Looks forward until the specified condition is met.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="condition">A predicate accepting the symbol being evaluated and the list of symbols which have been looped through.</param>
|
||||||
|
/// <returns>true, if the condition was met. false - if the condition wasn't met and the last symbol has already been processed.</returns>
|
||||||
|
/// <remarks>The list of previous symbols is passed in the reverse order. So the last processed element will be the first one in the list.</remarks>
|
||||||
|
protected bool LookaheadUntil(Func<TSymbol, IEnumerable<TSymbol>, bool> condition)
|
||||||
|
{
|
||||||
|
if (condition == null)
|
||||||
|
{
|
||||||
|
throw new ArgumentNullException(nameof(condition));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool matchFound = false;
|
||||||
|
|
||||||
|
// We add 1 in order to store the current symbol.
|
||||||
|
var symbols = new List<TSymbol>();
|
||||||
|
symbols.Add(CurrentSymbol);
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
if (!NextToken())
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
symbols.Add(CurrentSymbol);
|
||||||
|
if (condition(CurrentSymbol, symbols.Reverse<TSymbol>()))
|
||||||
|
{
|
||||||
|
matchFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore Tokenizer's location to where it was pointing before the look-ahead.
|
||||||
|
for (var i = symbols.Count - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
PutBack(symbols[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The PutBacks above will set CurrentSymbol to null. EnsureCurrent will set our CurrentSymbol to the
|
||||||
|
// next symbol.
|
||||||
|
EnsureCurrent();
|
||||||
|
|
||||||
|
return matchFound;
|
||||||
|
}
|
||||||
|
|
||||||
protected internal bool NextToken()
|
protected internal bool NextToken()
|
||||||
{
|
{
|
||||||
PreviousSymbol = CurrentSymbol;
|
PreviousSymbol = CurrentSymbol;
|
||||||
|
|
|
||||||
|
|
@ -419,9 +419,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
||||||
ParseBlockTest(
|
ParseBlockTest(
|
||||||
"<!--<foo>",
|
"<!--<foo>",
|
||||||
new MarkupBlock(
|
new MarkupBlock(
|
||||||
new HtmlCommentBlock(Factory.Markup("<!--").Accepts(AcceptedCharactersInternal.None)),
|
Factory.Markup("<!--<foo>").Accepts(AcceptedCharactersInternal.None)));
|
||||||
Factory.Markup("<foo>").Accepts(AcceptedCharactersInternal.WhiteSpace),
|
|
||||||
Factory.EmptyHtml()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
|
|
|
||||||
|
|
@ -219,8 +219,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
||||||
Factory.Markup("Foo Baz"),
|
Factory.Markup("Foo Baz"),
|
||||||
BlockFactory.HtmlCommentBlock("Foo"),
|
BlockFactory.HtmlCommentBlock("Foo"),
|
||||||
Factory.Markup("Bar"),
|
Factory.Markup("Bar"),
|
||||||
new HtmlCommentBlock(Factory.Markup("<!--").Accepts(AcceptedCharactersInternal.None)),
|
Factory.Markup("<!--F> Qux")));
|
||||||
Factory.Markup("F> Qux").Accepts(AcceptedCharactersInternal.WhiteSpace)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue