// Copyright (c) .NET Foundation. All rights reserved. // Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. using System; using System.Collections.Generic; using System.Diagnostics; using System.Text; namespace Microsoft.AspNetCore.Razor.Evolution.Legacy { internal abstract partial class Tokenizer : ITokenizer where TSymbolType : struct where TSymbol : SymbolBase { protected Tokenizer(ITextDocument source) { if (source == null) { throw new ArgumentNullException(nameof(source)); } Source = source; Buffer = new StringBuilder(); CurrentErrors = new List(); StartSymbol(); } protected List CurrentErrors { get; } protected abstract int StartState { get; } protected int? CurrentState { get; set; } protected TSymbol CurrentSymbol { get; private set; } public ITextDocument Source { get; private set; } protected StringBuilder Buffer { get; private set; } protected bool EndOfFile { get { return Source.Peek() == -1; } } public abstract TSymbolType RazorCommentStarType { get; } public abstract TSymbolType RazorCommentType { get; } public abstract TSymbolType RazorCommentTransitionType { get; } protected bool HaveContent { get { return Buffer.Length > 0; } } protected char CurrentCharacter { get { var peek = Source.Peek(); return peek == -1 ? '\0' : (char)peek; } } protected SourceLocation CurrentLocation { get { return Source.Location; } } protected SourceLocation CurrentStart { get; private set; } protected abstract TSymbol CreateSymbol(SourceLocation start, string content, TSymbolType type, IReadOnlyList errors); protected abstract StateResult Dispatch(); ISymbol ITokenizer.NextSymbol() { return NextSymbol(); } public virtual TSymbol NextSymbol() { // Post-Condition: Buffer should be empty at the start of Next() Debug.Assert(Buffer.Length == 0); StartSymbol(); if (EndOfFile) { return null; } var symbol = Turn(); // Post-Condition: Buffer should be empty at the end of Next() Debug.Assert(Buffer.Length == 0); return symbol; } protected virtual TSymbol Turn() { if (CurrentState != null) { // Run until we get into the stop state or have a result. do { var next = Dispatch(); CurrentState = next.State; CurrentSymbol = next.Result; } while (CurrentState != null && CurrentSymbol == null); if (CurrentState == null) { return default(TSymbol); // Terminated } return CurrentSymbol; } return default(TSymbol); } public void Reset() { CurrentState = StartState; } /// /// Returns a result indicating that the machine should stop executing and return null output. /// protected StateResult Stop() { return default(StateResult); } /// /// Returns a result indicating that this state has no output and the machine should immediately invoke the specified state /// /// /// By returning no output, the state machine will invoke the next state immediately, before returning /// controller to the caller of /// protected StateResult Transition(int state) { return new StateResult(state, result: null); } /// /// Returns a result containing the specified output and indicating that the next call to /// should invoke the provided state. /// protected StateResult Transition(int state, TSymbol result) { return new StateResult(state, result); } protected StateResult Transition(RazorCommentTokenizerState state) { return new StateResult((int)state, result: null); } protected StateResult Transition(RazorCommentTokenizerState state, TSymbol result) { return new StateResult((int)state, result); } /// /// Returns a result indicating that this state has no output and the machine should remain in this state /// /// /// By returning no output, the state machine will re-invoke the current state again before returning /// controller to the caller of /// protected StateResult Stay() { return new StateResult(CurrentState, result: null); } /// /// Returns a result containing the specified output and indicating that the next call to /// should re-invoke the current state. /// protected StateResult Stay(TSymbol result) { return new StateResult(CurrentState, result); } protected TSymbol Single(TSymbolType type) { TakeCurrent(); return EndSymbol(type); } protected void StartSymbol() { Buffer.Clear(); CurrentStart = CurrentLocation; CurrentErrors.Clear(); } protected TSymbol EndSymbol(TSymbolType type) { return EndSymbol(CurrentStart, type); } protected TSymbol EndSymbol(SourceLocation start, TSymbolType type) { TSymbol sym = null; if (HaveContent) { // Perf: Don't allocate a new errors array unless necessary. var errors = CurrentErrors.Count == 0 ? RazorError.EmptyArray : new RazorError[CurrentErrors.Count]; for (var i = 0; i < CurrentErrors.Count; i++) { errors[i] = CurrentErrors[i]; } sym = CreateSymbol(start, Buffer.ToString(), type, errors); } StartSymbol(); return sym; } protected bool TakeUntil(Func predicate) { // Take all the characters up to the end character while (!EndOfFile && !predicate(CurrentCharacter)) { TakeCurrent(); } // Why did we end? return !EndOfFile; } protected void TakeCurrent() { if (EndOfFile) { return; } // No-op Buffer.Append(CurrentCharacter); MoveNext(); } protected void MoveNext() { Source.Read(); } protected bool TakeAll(string expected, bool caseSensitive) { return Lookahead(expected, takeIfMatch: true, caseSensitive: caseSensitive); } protected char Peek() { using (var lookahead = BeginLookahead(Source)) { MoveNext(); return CurrentCharacter; } } protected StateResult AfterRazorCommentTransition() { if (CurrentCharacter != '*') { // We've been moved since last time we were asked for a symbol... reset the state return Transition(StartState); } AssertCurrent('*'); TakeCurrent(); return Transition(1002, EndSymbol(RazorCommentStarType)); } protected StateResult RazorCommentBody() { TakeUntil(c => c == '*'); if (CurrentCharacter == '*') { if (Peek() == '@') { if (HaveContent) { return Transition( RazorCommentTokenizerState.StarAfterRazorCommentBody, EndSymbol(RazorCommentType)); } else { return Transition(RazorCommentTokenizerState.StarAfterRazorCommentBody); } } else { TakeCurrent(); return Stay(); } } return Transition(StartState, EndSymbol(RazorCommentType)); } protected StateResult StarAfterRazorCommentBody() { AssertCurrent('*'); TakeCurrent(); return Transition( RazorCommentTokenizerState.AtSymbolAfterRazorCommentBody, EndSymbol(RazorCommentStarType)); } protected StateResult AtSymbolAfterRazorCommentBody() { AssertCurrent('@'); TakeCurrent(); return Transition(StartState, EndSymbol(RazorCommentTransitionType)); } /// /// Internal for unit testing /// internal bool Lookahead(string expected, bool takeIfMatch, bool caseSensitive) { Func filter = c => c; if (!caseSensitive) { filter = char.ToLowerInvariant; } if (expected.Length == 0 || filter(CurrentCharacter) != filter(expected[0])) { return false; } // Capture the current buffer content in case we have to backtrack string oldBuffer = null; if (takeIfMatch) { oldBuffer = Buffer.ToString(); } using (var lookahead = BeginLookahead(Source)) { for (int i = 0; i < expected.Length; i++) { if (filter(CurrentCharacter) != filter(expected[i])) { if (takeIfMatch) { // Clear the buffer and put the old buffer text back Buffer.Clear(); Buffer.Append(oldBuffer); } // Return without accepting lookahead (thus rejecting it) return false; } if (takeIfMatch) { TakeCurrent(); } else { MoveNext(); } } if (takeIfMatch) { lookahead.Accept(); } } return true; } [Conditional("DEBUG")] internal void AssertCurrent(char current) { Debug.Assert(CurrentCharacter == current, "CurrentCharacter Assumption violated", "Assumed that the current character would be {0}, but it is actually {1}", current, CurrentCharacter); } protected enum RazorCommentTokenizerState { AfterRazorCommentTransition = 1000, EscapedRazorCommentTransition, RazorCommentBody, StarAfterRazorCommentBody, AtSymbolAfterRazorCommentBody, } protected struct StateResult { public StateResult(int? state, TSymbol result) { State = state; Result = result; } public int? State { get; } public TSymbol Result { get; } } private static LookaheadToken BeginLookahead(ITextBuffer buffer) { var start = buffer.Position; return new LookaheadToken(buffer); } private struct LookaheadToken : IDisposable { private readonly ITextBuffer _buffer; private readonly int _position; private bool _accepted; public LookaheadToken(ITextBuffer buffer) { _buffer = buffer; _position = buffer.Position; _accepted = false; } public void Accept() { _accepted = true; } public void Dispose() { if (!_accepted) { _buffer.Position = _position; } } } } }