// Copyright (c) .NET Foundation. All rights reserved. // Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. using System; using System.Collections.Generic; using System.Text.RegularExpressions; namespace Microsoft.AspNetCore.Rewrite.Internal.ModRewrite { /// /// Tokenizes a mod_rewrite rule, delimited by spaces. /// public class Tokenizer { private const char Space = ' '; private const char Escape = '\\'; private const char Tab = '\t'; private const char Quote = '"'; /// /// Splits a string on whitespace, ignoring spaces, creating into a list of strings. /// /// The rule to tokenize. /// A list of tokens. public IList Tokenize(string rule) { // TODO make list of strings a reference to the original rule? (run into problems with escaped spaces). // TODO handle "s and probably replace \ character with no slash. if (string.IsNullOrEmpty(rule)) { return null; } var context = new ParserContext(rule); context.Next(); var tokens = new List(); context.Mark(); while (true) { switch (context.Current) { case Escape: // Need to progress such that the next character is not evaluated. if (!context.Next()) { // Means that a character was not escaped appropriately Ex: "foo\" throw new FormatException($"Invalid escaper character in string: {rule}"); } break; case Quote: // Ignore all characters until the next quote is hit if (!context.Next()) { throw new FormatException($"Mismatched number of quotes: {rule}"); } while (context.Current != Quote) { if (!context.Next()) { throw new FormatException($"Mismatched number of quotes: {rule}"); } } break; case Space: case Tab: // time to capture! var token = context.Capture(); if (!string.IsNullOrEmpty(token)) { tokens.Add(token); do { if (!context.Next()) { // At end of string, we can return at this point. RemoveQuotesAndEscapeCharacters(tokens); return tokens; } } while (context.Current == Space || context.Current == Tab); context.Mark(); context.Back(); } break; } if (!context.Next()) { // End of string. Capture. break; } } var done = context.Capture(); if (!string.IsNullOrEmpty(done)) { tokens.Add(done); } RemoveQuotesAndEscapeCharacters(tokens); return tokens; } // Need to remove leading and trailing slashes if they exist. // This is on start-up, so more forgivening towards substrings/ new strings // If this is a perf/memory problem, discuss later. private static void RemoveQuotesAndEscapeCharacters(IList tokens) { for (var i = 0; i < tokens.Count; i++) { var token = tokens[i]; var trimmed = token.Trim('\"'); tokens[i] = Regex.Unescape(trimmed); } } } }