805 lines
30 KiB
C#
805 lines
30 KiB
C#
// Copyright (c) .NET Foundation. All rights reserved.
|
|
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
using System.Globalization;
|
|
|
|
namespace Microsoft.AspNetCore.Razor.Language.Legacy
|
|
{
|
|
internal class CSharpTokenizer : Tokenizer<CSharpSymbol, CSharpSymbolType>
|
|
{
|
|
private Dictionary<char, Func<CSharpSymbolType>> _operatorHandlers;
|
|
|
|
private static readonly Dictionary<string, CSharpKeyword> _keywords = new Dictionary<string, CSharpKeyword>(StringComparer.Ordinal)
|
|
{
|
|
{ "await", CSharpKeyword.Await },
|
|
{ "abstract", CSharpKeyword.Abstract },
|
|
{ "byte", CSharpKeyword.Byte },
|
|
{ "class", CSharpKeyword.Class },
|
|
{ "delegate", CSharpKeyword.Delegate },
|
|
{ "event", CSharpKeyword.Event },
|
|
{ "fixed", CSharpKeyword.Fixed },
|
|
{ "if", CSharpKeyword.If },
|
|
{ "internal", CSharpKeyword.Internal },
|
|
{ "new", CSharpKeyword.New },
|
|
{ "override", CSharpKeyword.Override },
|
|
{ "readonly", CSharpKeyword.Readonly },
|
|
{ "short", CSharpKeyword.Short },
|
|
{ "struct", CSharpKeyword.Struct },
|
|
{ "try", CSharpKeyword.Try },
|
|
{ "unsafe", CSharpKeyword.Unsafe },
|
|
{ "volatile", CSharpKeyword.Volatile },
|
|
{ "as", CSharpKeyword.As },
|
|
{ "do", CSharpKeyword.Do },
|
|
{ "is", CSharpKeyword.Is },
|
|
{ "params", CSharpKeyword.Params },
|
|
{ "ref", CSharpKeyword.Ref },
|
|
{ "switch", CSharpKeyword.Switch },
|
|
{ "ushort", CSharpKeyword.Ushort },
|
|
{ "while", CSharpKeyword.While },
|
|
{ "case", CSharpKeyword.Case },
|
|
{ "const", CSharpKeyword.Const },
|
|
{ "explicit", CSharpKeyword.Explicit },
|
|
{ "float", CSharpKeyword.Float },
|
|
{ "null", CSharpKeyword.Null },
|
|
{ "sizeof", CSharpKeyword.Sizeof },
|
|
{ "typeof", CSharpKeyword.Typeof },
|
|
{ "implicit", CSharpKeyword.Implicit },
|
|
{ "private", CSharpKeyword.Private },
|
|
{ "this", CSharpKeyword.This },
|
|
{ "using", CSharpKeyword.Using },
|
|
{ "extern", CSharpKeyword.Extern },
|
|
{ "return", CSharpKeyword.Return },
|
|
{ "stackalloc", CSharpKeyword.Stackalloc },
|
|
{ "uint", CSharpKeyword.Uint },
|
|
{ "base", CSharpKeyword.Base },
|
|
{ "catch", CSharpKeyword.Catch },
|
|
{ "continue", CSharpKeyword.Continue },
|
|
{ "double", CSharpKeyword.Double },
|
|
{ "for", CSharpKeyword.For },
|
|
{ "in", CSharpKeyword.In },
|
|
{ "lock", CSharpKeyword.Lock },
|
|
{ "object", CSharpKeyword.Object },
|
|
{ "protected", CSharpKeyword.Protected },
|
|
{ "static", CSharpKeyword.Static },
|
|
{ "false", CSharpKeyword.False },
|
|
{ "public", CSharpKeyword.Public },
|
|
{ "sbyte", CSharpKeyword.Sbyte },
|
|
{ "throw", CSharpKeyword.Throw },
|
|
{ "virtual", CSharpKeyword.Virtual },
|
|
{ "decimal", CSharpKeyword.Decimal },
|
|
{ "else", CSharpKeyword.Else },
|
|
{ "operator", CSharpKeyword.Operator },
|
|
{ "string", CSharpKeyword.String },
|
|
{ "ulong", CSharpKeyword.Ulong },
|
|
{ "bool", CSharpKeyword.Bool },
|
|
{ "char", CSharpKeyword.Char },
|
|
{ "default", CSharpKeyword.Default },
|
|
{ "foreach", CSharpKeyword.Foreach },
|
|
{ "long", CSharpKeyword.Long },
|
|
{ "void", CSharpKeyword.Void },
|
|
{ "enum", CSharpKeyword.Enum },
|
|
{ "finally", CSharpKeyword.Finally },
|
|
{ "int", CSharpKeyword.Int },
|
|
{ "out", CSharpKeyword.Out },
|
|
{ "sealed", CSharpKeyword.Sealed },
|
|
{ "true", CSharpKeyword.True },
|
|
{ "goto", CSharpKeyword.Goto },
|
|
{ "unchecked", CSharpKeyword.Unchecked },
|
|
{ "interface", CSharpKeyword.Interface },
|
|
{ "break", CSharpKeyword.Break },
|
|
{ "checked", CSharpKeyword.Checked },
|
|
{ "namespace", CSharpKeyword.Namespace },
|
|
{ "when", CSharpKeyword.When }
|
|
};
|
|
|
|
public CSharpTokenizer(ITextDocument source)
|
|
: base(source)
|
|
{
|
|
base.CurrentState = StartState;
|
|
|
|
_operatorHandlers = new Dictionary<char, Func<CSharpSymbolType>>()
|
|
{
|
|
{ '-', MinusOperator },
|
|
{ '<', LessThanOperator },
|
|
{ '>', GreaterThanOperator },
|
|
{ '&', CreateTwoCharOperatorHandler(CSharpSymbolType.And, '=', CSharpSymbolType.AndAssign, '&', CSharpSymbolType.DoubleAnd) },
|
|
{ '|', CreateTwoCharOperatorHandler(CSharpSymbolType.Or, '=', CSharpSymbolType.OrAssign, '|', CSharpSymbolType.DoubleOr) },
|
|
{ '+', CreateTwoCharOperatorHandler(CSharpSymbolType.Plus, '=', CSharpSymbolType.PlusAssign, '+', CSharpSymbolType.Increment) },
|
|
{ '=', CreateTwoCharOperatorHandler(CSharpSymbolType.Assign, '=', CSharpSymbolType.Equals, '>', CSharpSymbolType.GreaterThanEqual) },
|
|
{ '!', CreateTwoCharOperatorHandler(CSharpSymbolType.Not, '=', CSharpSymbolType.NotEqual) },
|
|
{ '%', CreateTwoCharOperatorHandler(CSharpSymbolType.Modulo, '=', CSharpSymbolType.ModuloAssign) },
|
|
{ '*', CreateTwoCharOperatorHandler(CSharpSymbolType.Star, '=', CSharpSymbolType.MultiplyAssign) },
|
|
{ ':', CreateTwoCharOperatorHandler(CSharpSymbolType.Colon, ':', CSharpSymbolType.DoubleColon) },
|
|
{ '?', CreateTwoCharOperatorHandler(CSharpSymbolType.QuestionMark, '?', CSharpSymbolType.NullCoalesce) },
|
|
{ '^', CreateTwoCharOperatorHandler(CSharpSymbolType.Xor, '=', CSharpSymbolType.XorAssign) },
|
|
{ '(', () => CSharpSymbolType.LeftParenthesis },
|
|
{ ')', () => CSharpSymbolType.RightParenthesis },
|
|
{ '{', () => CSharpSymbolType.LeftBrace },
|
|
{ '}', () => CSharpSymbolType.RightBrace },
|
|
{ '[', () => CSharpSymbolType.LeftBracket },
|
|
{ ']', () => CSharpSymbolType.RightBracket },
|
|
{ ',', () => CSharpSymbolType.Comma },
|
|
{ ';', () => CSharpSymbolType.Semicolon },
|
|
{ '~', () => CSharpSymbolType.Tilde },
|
|
{ '#', () => CSharpSymbolType.Hash }
|
|
};
|
|
}
|
|
|
|
protected override int StartState => (int)CSharpTokenizerState.Data;
|
|
|
|
private new CSharpTokenizerState? CurrentState => (CSharpTokenizerState?)base.CurrentState;
|
|
|
|
public override CSharpSymbolType RazorCommentType => CSharpSymbolType.RazorComment;
|
|
|
|
public override CSharpSymbolType RazorCommentTransitionType => CSharpSymbolType.RazorCommentTransition;
|
|
|
|
public override CSharpSymbolType RazorCommentStarType => CSharpSymbolType.RazorCommentStar;
|
|
|
|
protected override StateResult Dispatch()
|
|
{
|
|
switch (CurrentState)
|
|
{
|
|
case CSharpTokenizerState.Data:
|
|
return Data();
|
|
case CSharpTokenizerState.BlockComment:
|
|
return BlockComment();
|
|
case CSharpTokenizerState.QuotedCharacterLiteral:
|
|
return QuotedCharacterLiteral();
|
|
case CSharpTokenizerState.QuotedStringLiteral:
|
|
return QuotedStringLiteral();
|
|
case CSharpTokenizerState.VerbatimStringLiteral:
|
|
return VerbatimStringLiteral();
|
|
case CSharpTokenizerState.AfterRazorCommentTransition:
|
|
return AfterRazorCommentTransition();
|
|
case CSharpTokenizerState.EscapedRazorCommentTransition:
|
|
return EscapedRazorCommentTransition();
|
|
case CSharpTokenizerState.RazorCommentBody:
|
|
return RazorCommentBody();
|
|
case CSharpTokenizerState.StarAfterRazorCommentBody:
|
|
return StarAfterRazorCommentBody();
|
|
case CSharpTokenizerState.AtSymbolAfterRazorCommentBody:
|
|
return AtSymbolAfterRazorCommentBody();
|
|
default:
|
|
Debug.Fail("Invalid TokenizerState");
|
|
return default(StateResult);
|
|
}
|
|
}
|
|
|
|
// Optimize memory allocation by returning constants for the most frequent cases
|
|
protected override string GetSymbolContent(CSharpSymbolType type)
|
|
{
|
|
var symbolLength = Buffer.Length;
|
|
|
|
if (symbolLength == 1)
|
|
{
|
|
switch (type)
|
|
{
|
|
case CSharpSymbolType.IntegerLiteral:
|
|
switch (Buffer[0])
|
|
{
|
|
case '0':
|
|
return "0";
|
|
case '1':
|
|
return "1";
|
|
case '2':
|
|
return "2";
|
|
case '3':
|
|
return "3";
|
|
case '4':
|
|
return "4";
|
|
case '5':
|
|
return "5";
|
|
case '6':
|
|
return "6";
|
|
case '7':
|
|
return "7";
|
|
case '8':
|
|
return "8";
|
|
case '9':
|
|
return "9";
|
|
}
|
|
break;
|
|
case CSharpSymbolType.NewLine:
|
|
if (Buffer[0] == '\n')
|
|
{
|
|
return "\n";
|
|
}
|
|
break;
|
|
case CSharpSymbolType.WhiteSpace:
|
|
if (Buffer[0] == ' ')
|
|
{
|
|
return " ";
|
|
}
|
|
if (Buffer[0] == '\t')
|
|
{
|
|
return "\t";
|
|
}
|
|
break;
|
|
case CSharpSymbolType.Minus:
|
|
return "-";
|
|
case CSharpSymbolType.Not:
|
|
return "!";
|
|
case CSharpSymbolType.Modulo:
|
|
return "%";
|
|
case CSharpSymbolType.And:
|
|
return "&";
|
|
case CSharpSymbolType.LeftParenthesis:
|
|
return "(";
|
|
case CSharpSymbolType.RightParenthesis:
|
|
return ")";
|
|
case CSharpSymbolType.Star:
|
|
return "*";
|
|
case CSharpSymbolType.Comma:
|
|
return ",";
|
|
case CSharpSymbolType.Dot:
|
|
return ".";
|
|
case CSharpSymbolType.Slash:
|
|
return "/";
|
|
case CSharpSymbolType.Colon:
|
|
return ":";
|
|
case CSharpSymbolType.Semicolon:
|
|
return ";";
|
|
case CSharpSymbolType.QuestionMark:
|
|
return "?";
|
|
case CSharpSymbolType.RightBracket:
|
|
return "]";
|
|
case CSharpSymbolType.LeftBracket:
|
|
return "[";
|
|
case CSharpSymbolType.Xor:
|
|
return "^";
|
|
case CSharpSymbolType.LeftBrace:
|
|
return "{";
|
|
case CSharpSymbolType.Or:
|
|
return "|";
|
|
case CSharpSymbolType.RightBrace:
|
|
return "}";
|
|
case CSharpSymbolType.Tilde:
|
|
return "~";
|
|
case CSharpSymbolType.Plus:
|
|
return "+";
|
|
case CSharpSymbolType.LessThan:
|
|
return "<";
|
|
case CSharpSymbolType.Assign:
|
|
return "=";
|
|
case CSharpSymbolType.GreaterThan:
|
|
return ">";
|
|
case CSharpSymbolType.Hash:
|
|
return "#";
|
|
case CSharpSymbolType.Transition:
|
|
return "@";
|
|
|
|
}
|
|
}
|
|
else if (symbolLength == 2)
|
|
{
|
|
switch (type)
|
|
{
|
|
case CSharpSymbolType.NewLine:
|
|
return "\r\n";
|
|
case CSharpSymbolType.Arrow:
|
|
return "->";
|
|
case CSharpSymbolType.Decrement:
|
|
return "--";
|
|
case CSharpSymbolType.MinusAssign:
|
|
return "-=";
|
|
case CSharpSymbolType.NotEqual:
|
|
return "!=";
|
|
case CSharpSymbolType.ModuloAssign:
|
|
return "%=";
|
|
case CSharpSymbolType.AndAssign:
|
|
return "&=";
|
|
case CSharpSymbolType.DoubleAnd:
|
|
return "&&";
|
|
case CSharpSymbolType.MultiplyAssign:
|
|
return "*=";
|
|
case CSharpSymbolType.DivideAssign:
|
|
return "/=";
|
|
case CSharpSymbolType.DoubleColon:
|
|
return "::";
|
|
case CSharpSymbolType.NullCoalesce:
|
|
return "??";
|
|
case CSharpSymbolType.XorAssign:
|
|
return "^=";
|
|
case CSharpSymbolType.OrAssign:
|
|
return "|=";
|
|
case CSharpSymbolType.DoubleOr:
|
|
return "||";
|
|
case CSharpSymbolType.PlusAssign:
|
|
return "+=";
|
|
case CSharpSymbolType.Increment:
|
|
return "++";
|
|
case CSharpSymbolType.LessThanEqual:
|
|
return "<=";
|
|
case CSharpSymbolType.LeftShift:
|
|
return "<<";
|
|
case CSharpSymbolType.Equals:
|
|
return "==";
|
|
case CSharpSymbolType.GreaterThanEqual:
|
|
if (Buffer[0] == '=')
|
|
{
|
|
return "=>";
|
|
}
|
|
return ">=";
|
|
case CSharpSymbolType.RightShift:
|
|
return ">>";
|
|
|
|
|
|
}
|
|
}
|
|
else if (symbolLength == 3)
|
|
{
|
|
switch (type)
|
|
{
|
|
case CSharpSymbolType.LeftShiftAssign:
|
|
return "<<=";
|
|
case CSharpSymbolType.RightShiftAssign:
|
|
return ">>=";
|
|
}
|
|
}
|
|
|
|
return base.GetSymbolContent(type);
|
|
}
|
|
|
|
protected override CSharpSymbol CreateSymbol(string content, CSharpSymbolType type, IReadOnlyList<RazorError> errors)
|
|
{
|
|
return new CSharpSymbol(content, type, errors);
|
|
}
|
|
|
|
private StateResult Data()
|
|
{
|
|
if (ParserHelpers.IsNewLine(CurrentCharacter))
|
|
{
|
|
// CSharp Spec §2.3.1
|
|
var checkTwoCharNewline = CurrentCharacter == '\r';
|
|
TakeCurrent();
|
|
if (checkTwoCharNewline && CurrentCharacter == '\n')
|
|
{
|
|
TakeCurrent();
|
|
}
|
|
return Stay(EndSymbol(CSharpSymbolType.NewLine));
|
|
}
|
|
else if (ParserHelpers.IsWhitespace(CurrentCharacter))
|
|
{
|
|
// CSharp Spec §2.3.3
|
|
TakeUntil(c => !ParserHelpers.IsWhitespace(c));
|
|
return Stay(EndSymbol(CSharpSymbolType.WhiteSpace));
|
|
}
|
|
else if (IsIdentifierStart(CurrentCharacter))
|
|
{
|
|
return Identifier();
|
|
}
|
|
else if (char.IsDigit(CurrentCharacter))
|
|
{
|
|
return NumericLiteral();
|
|
}
|
|
switch (CurrentCharacter)
|
|
{
|
|
case '@':
|
|
return AtSymbol();
|
|
case '\'':
|
|
TakeCurrent();
|
|
return Transition(CSharpTokenizerState.QuotedCharacterLiteral);
|
|
case '"':
|
|
TakeCurrent();
|
|
return Transition(CSharpTokenizerState.QuotedStringLiteral);
|
|
case '.':
|
|
if (char.IsDigit(Peek()))
|
|
{
|
|
return RealLiteral();
|
|
}
|
|
return Stay(Single(CSharpSymbolType.Dot));
|
|
case '/':
|
|
TakeCurrent();
|
|
if (CurrentCharacter == '/')
|
|
{
|
|
TakeCurrent();
|
|
return SingleLineComment();
|
|
}
|
|
else if (CurrentCharacter == '*')
|
|
{
|
|
TakeCurrent();
|
|
return Transition(CSharpTokenizerState.BlockComment);
|
|
}
|
|
else if (CurrentCharacter == '=')
|
|
{
|
|
TakeCurrent();
|
|
return Stay(EndSymbol(CSharpSymbolType.DivideAssign));
|
|
}
|
|
else
|
|
{
|
|
return Stay(EndSymbol(CSharpSymbolType.Slash));
|
|
}
|
|
default:
|
|
return Stay(EndSymbol(Operator()));
|
|
}
|
|
}
|
|
|
|
private StateResult AtSymbol()
|
|
{
|
|
TakeCurrent();
|
|
if (CurrentCharacter == '"')
|
|
{
|
|
TakeCurrent();
|
|
return Transition(CSharpTokenizerState.VerbatimStringLiteral);
|
|
}
|
|
else if (CurrentCharacter == '*')
|
|
{
|
|
return Transition(
|
|
CSharpTokenizerState.AfterRazorCommentTransition,
|
|
EndSymbol(CSharpSymbolType.RazorCommentTransition));
|
|
}
|
|
else if (CurrentCharacter == '@')
|
|
{
|
|
// Could be escaped comment transition
|
|
return Transition(
|
|
CSharpTokenizerState.EscapedRazorCommentTransition,
|
|
EndSymbol(CSharpSymbolType.Transition));
|
|
}
|
|
|
|
return Stay(EndSymbol(CSharpSymbolType.Transition));
|
|
}
|
|
|
|
private StateResult EscapedRazorCommentTransition()
|
|
{
|
|
TakeCurrent();
|
|
return Transition(CSharpTokenizerState.Data, EndSymbol(CSharpSymbolType.Transition));
|
|
}
|
|
|
|
private CSharpSymbolType Operator()
|
|
{
|
|
var first = CurrentCharacter;
|
|
TakeCurrent();
|
|
Func<CSharpSymbolType> handler;
|
|
if (_operatorHandlers.TryGetValue(first, out handler))
|
|
{
|
|
return handler();
|
|
}
|
|
return CSharpSymbolType.Unknown;
|
|
}
|
|
|
|
private CSharpSymbolType LessThanOperator()
|
|
{
|
|
if (CurrentCharacter == '=')
|
|
{
|
|
TakeCurrent();
|
|
return CSharpSymbolType.LessThanEqual;
|
|
}
|
|
return CSharpSymbolType.LessThan;
|
|
}
|
|
|
|
private CSharpSymbolType GreaterThanOperator()
|
|
{
|
|
if (CurrentCharacter == '=')
|
|
{
|
|
TakeCurrent();
|
|
return CSharpSymbolType.GreaterThanEqual;
|
|
}
|
|
return CSharpSymbolType.GreaterThan;
|
|
}
|
|
|
|
private CSharpSymbolType MinusOperator()
|
|
{
|
|
if (CurrentCharacter == '>')
|
|
{
|
|
TakeCurrent();
|
|
return CSharpSymbolType.Arrow;
|
|
}
|
|
else if (CurrentCharacter == '-')
|
|
{
|
|
TakeCurrent();
|
|
return CSharpSymbolType.Decrement;
|
|
}
|
|
else if (CurrentCharacter == '=')
|
|
{
|
|
TakeCurrent();
|
|
return CSharpSymbolType.MinusAssign;
|
|
}
|
|
return CSharpSymbolType.Minus;
|
|
}
|
|
|
|
private Func<CSharpSymbolType> CreateTwoCharOperatorHandler(CSharpSymbolType typeIfOnlyFirst, char second, CSharpSymbolType typeIfBoth)
|
|
{
|
|
return () =>
|
|
{
|
|
if (CurrentCharacter == second)
|
|
{
|
|
TakeCurrent();
|
|
return typeIfBoth;
|
|
}
|
|
return typeIfOnlyFirst;
|
|
};
|
|
}
|
|
|
|
private Func<CSharpSymbolType> CreateTwoCharOperatorHandler(CSharpSymbolType typeIfOnlyFirst, char option1, CSharpSymbolType typeIfOption1, char option2, CSharpSymbolType typeIfOption2)
|
|
{
|
|
return () =>
|
|
{
|
|
if (CurrentCharacter == option1)
|
|
{
|
|
TakeCurrent();
|
|
return typeIfOption1;
|
|
}
|
|
else if (CurrentCharacter == option2)
|
|
{
|
|
TakeCurrent();
|
|
return typeIfOption2;
|
|
}
|
|
return typeIfOnlyFirst;
|
|
};
|
|
}
|
|
|
|
private StateResult VerbatimStringLiteral()
|
|
{
|
|
TakeUntil(c => c == '"');
|
|
if (CurrentCharacter == '"')
|
|
{
|
|
TakeCurrent();
|
|
if (CurrentCharacter == '"')
|
|
{
|
|
TakeCurrent();
|
|
// Stay in the literal, this is an escaped "
|
|
return Stay();
|
|
}
|
|
}
|
|
else if (EndOfFile)
|
|
{
|
|
CurrentErrors.Add(
|
|
new RazorError(
|
|
LegacyResources.ParseError_Unterminated_String_Literal,
|
|
CurrentStart,
|
|
length: 1 /* end of file */));
|
|
}
|
|
return Transition(CSharpTokenizerState.Data, EndSymbol(CSharpSymbolType.StringLiteral));
|
|
}
|
|
|
|
private StateResult QuotedCharacterLiteral() => QuotedLiteral('\'', CSharpSymbolType.CharacterLiteral);
|
|
|
|
private StateResult QuotedStringLiteral() => QuotedLiteral('\"', CSharpSymbolType.StringLiteral);
|
|
|
|
private StateResult QuotedLiteral(char quote, CSharpSymbolType literalType)
|
|
{
|
|
TakeUntil(c => c == '\\' || c == quote || ParserHelpers.IsNewLine(c));
|
|
if (CurrentCharacter == '\\')
|
|
{
|
|
TakeCurrent(); // Take the '\'
|
|
|
|
// If the next char is the same quote that started this
|
|
if (CurrentCharacter == quote || CurrentCharacter == '\\')
|
|
{
|
|
TakeCurrent(); // Take it so that we don't prematurely end the literal.
|
|
}
|
|
return Stay();
|
|
}
|
|
else if (EndOfFile || ParserHelpers.IsNewLine(CurrentCharacter))
|
|
{
|
|
CurrentErrors.Add(
|
|
new RazorError(
|
|
LegacyResources.ParseError_Unterminated_String_Literal,
|
|
CurrentStart,
|
|
length: 1 /* " */));
|
|
}
|
|
else
|
|
{
|
|
TakeCurrent(); // No-op if at EOF
|
|
}
|
|
return Transition(CSharpTokenizerState.Data, EndSymbol(literalType));
|
|
}
|
|
|
|
// CSharp Spec §2.3.2
|
|
private StateResult BlockComment()
|
|
{
|
|
TakeUntil(c => c == '*');
|
|
if (EndOfFile)
|
|
{
|
|
CurrentErrors.Add(
|
|
new RazorError(
|
|
LegacyResources.ParseError_BlockComment_Not_Terminated,
|
|
CurrentStart,
|
|
length: 1 /* end of file */));
|
|
return Transition(CSharpTokenizerState.Data, EndSymbol(CSharpSymbolType.Comment));
|
|
}
|
|
if (CurrentCharacter == '*')
|
|
{
|
|
TakeCurrent();
|
|
if (CurrentCharacter == '/')
|
|
{
|
|
TakeCurrent();
|
|
return Transition(CSharpTokenizerState.Data, EndSymbol(CSharpSymbolType.Comment));
|
|
}
|
|
}
|
|
return Stay();
|
|
}
|
|
|
|
// CSharp Spec §2.3.2
|
|
private StateResult SingleLineComment()
|
|
{
|
|
TakeUntil(c => ParserHelpers.IsNewLine(c));
|
|
return Stay(EndSymbol(CSharpSymbolType.Comment));
|
|
}
|
|
|
|
// CSharp Spec §2.4.4
|
|
private StateResult NumericLiteral()
|
|
{
|
|
if (TakeAll("0x", caseSensitive: true))
|
|
{
|
|
return HexLiteral();
|
|
}
|
|
else
|
|
{
|
|
return DecimalLiteral();
|
|
}
|
|
}
|
|
|
|
private StateResult HexLiteral()
|
|
{
|
|
TakeUntil(c => !IsHexDigit(c));
|
|
TakeIntegerSuffix();
|
|
return Stay(EndSymbol(CSharpSymbolType.IntegerLiteral));
|
|
}
|
|
|
|
private StateResult DecimalLiteral()
|
|
{
|
|
TakeUntil(c => !Char.IsDigit(c));
|
|
if (CurrentCharacter == '.' && Char.IsDigit(Peek()))
|
|
{
|
|
return RealLiteral();
|
|
}
|
|
else if (IsRealLiteralSuffix(CurrentCharacter) ||
|
|
CurrentCharacter == 'E' || CurrentCharacter == 'e')
|
|
{
|
|
return RealLiteralExponentPart();
|
|
}
|
|
else
|
|
{
|
|
TakeIntegerSuffix();
|
|
return Stay(EndSymbol(CSharpSymbolType.IntegerLiteral));
|
|
}
|
|
}
|
|
|
|
private StateResult RealLiteralExponentPart()
|
|
{
|
|
if (CurrentCharacter == 'E' || CurrentCharacter == 'e')
|
|
{
|
|
TakeCurrent();
|
|
if (CurrentCharacter == '+' || CurrentCharacter == '-')
|
|
{
|
|
TakeCurrent();
|
|
}
|
|
TakeUntil(c => !Char.IsDigit(c));
|
|
}
|
|
if (IsRealLiteralSuffix(CurrentCharacter))
|
|
{
|
|
TakeCurrent();
|
|
}
|
|
return Stay(EndSymbol(CSharpSymbolType.RealLiteral));
|
|
}
|
|
|
|
// CSharp Spec §2.4.4.3
|
|
private StateResult RealLiteral()
|
|
{
|
|
AssertCurrent('.');
|
|
TakeCurrent();
|
|
Debug.Assert(Char.IsDigit(CurrentCharacter));
|
|
TakeUntil(c => !Char.IsDigit(c));
|
|
return RealLiteralExponentPart();
|
|
}
|
|
|
|
private void TakeIntegerSuffix()
|
|
{
|
|
if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
|
|
{
|
|
TakeCurrent();
|
|
if (Char.ToLowerInvariant(CurrentCharacter) == 'l')
|
|
{
|
|
TakeCurrent();
|
|
}
|
|
}
|
|
else if (Char.ToLowerInvariant(CurrentCharacter) == 'l')
|
|
{
|
|
TakeCurrent();
|
|
if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
|
|
{
|
|
TakeCurrent();
|
|
}
|
|
}
|
|
}
|
|
|
|
// CSharp Spec §2.4.2
|
|
private StateResult Identifier()
|
|
{
|
|
Debug.Assert(IsIdentifierStart(CurrentCharacter));
|
|
TakeCurrent();
|
|
TakeUntil(c => !IsIdentifierPart(c));
|
|
CSharpSymbol symbol = null;
|
|
if (HaveContent)
|
|
{
|
|
CSharpKeyword keyword;
|
|
var type = CSharpSymbolType.Identifier;
|
|
var symbolContent = Buffer.ToString();
|
|
if (_keywords.TryGetValue(symbolContent, out keyword))
|
|
{
|
|
type = CSharpSymbolType.Keyword;
|
|
}
|
|
|
|
symbol = new CSharpSymbol(symbolContent, type)
|
|
{
|
|
Keyword = type == CSharpSymbolType.Keyword ? (CSharpKeyword?)keyword : null,
|
|
};
|
|
|
|
Buffer.Clear();
|
|
CurrentErrors.Clear();
|
|
}
|
|
|
|
return Stay(symbol);
|
|
}
|
|
|
|
private StateResult Transition(CSharpTokenizerState state)
|
|
{
|
|
return Transition((int)state, result: null);
|
|
}
|
|
|
|
private StateResult Transition(CSharpTokenizerState state, CSharpSymbol result)
|
|
{
|
|
return Transition((int)state, result);
|
|
}
|
|
|
|
private static bool IsIdentifierStart(char character)
|
|
{
|
|
return char.IsLetter(character) ||
|
|
character == '_' ||
|
|
CharUnicodeInfo.GetUnicodeCategory(character) == UnicodeCategory.LetterNumber;
|
|
}
|
|
|
|
private static bool IsIdentifierPart(char character)
|
|
{
|
|
return char.IsDigit(character) ||
|
|
IsIdentifierStart(character) ||
|
|
IsIdentifierPartByUnicodeCategory(character);
|
|
}
|
|
|
|
private static bool IsRealLiteralSuffix(char character)
|
|
{
|
|
return character == 'F' ||
|
|
character == 'f' ||
|
|
character == 'D' ||
|
|
character == 'd' ||
|
|
character == 'M' ||
|
|
character == 'm';
|
|
}
|
|
|
|
private static bool IsIdentifierPartByUnicodeCategory(char character)
|
|
{
|
|
var category = CharUnicodeInfo.GetUnicodeCategory(character);
|
|
|
|
return category == UnicodeCategory.NonSpacingMark || // Mn
|
|
category == UnicodeCategory.SpacingCombiningMark || // Mc
|
|
category == UnicodeCategory.ConnectorPunctuation || // Pc
|
|
category == UnicodeCategory.Format; // Cf
|
|
}
|
|
|
|
private static bool IsHexDigit(char value)
|
|
{
|
|
return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f');
|
|
}
|
|
|
|
private enum CSharpTokenizerState
|
|
{
|
|
Data,
|
|
BlockComment,
|
|
QuotedCharacterLiteral,
|
|
QuotedStringLiteral,
|
|
VerbatimStringLiteral,
|
|
|
|
// Razor Comments - need to be the same for HTML and CSharp
|
|
AfterRazorCommentTransition = RazorCommentTokenizerState.AfterRazorCommentTransition,
|
|
EscapedRazorCommentTransition = RazorCommentTokenizerState.EscapedRazorCommentTransition,
|
|
RazorCommentBody = RazorCommentTokenizerState.RazorCommentBody,
|
|
StarAfterRazorCommentBody = RazorCommentTokenizerState.StarAfterRazorCommentBody,
|
|
AtSymbolAfterRazorCommentBody = RazorCommentTokenizerState.AtSymbolAfterRazorCommentBody,
|
|
}
|
|
}
|
|
}
|