aspnetcore/src/Microsoft.AspNetCore.Rewrite/Internal/ModRewrite/Tokenizer.cs

116 lines
4.3 KiB
C#

// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace Microsoft.AspNetCore.Rewrite.Internal.ModRewrite
{
/// <summary>
/// Tokenizes a mod_rewrite rule, delimited by spaces.
/// </summary>
public class Tokenizer
{
private const char Space = ' ';
private const char Escape = '\\';
private const char Tab = '\t';
private const char Quote = '"';
/// <summary>
/// Splits a string on whitespace, ignoring spaces, creating into a list of strings.
/// </summary>
/// <param name="rule">The rule to tokenize.</param>
/// <returns>A list of tokens.</returns>
public IList<string> Tokenize(string rule)
{
// TODO make list of strings a reference to the original rule? (run into problems with escaped spaces).
// TODO handle "s and probably replace \ character with no slash.
if (string.IsNullOrEmpty(rule))
{
return null;
}
var context = new ParserContext(rule);
context.Next();
var tokens = new List<string>();
context.Mark();
while (true)
{
switch (context.Current)
{
case Escape:
// Need to progress such that the next character is not evaluated.
if (!context.Next())
{
// Means that a character was not escaped appropriately Ex: "foo\"
throw new FormatException($"Invalid escaper character in string: {rule}");
}
break;
case Quote:
// Ignore all characters until the next quote is hit
if (!context.Next())
{
throw new FormatException($"Mismatched number of quotes: {rule}");
}
while (context.Current != Quote)
{
if (!context.Next())
{
throw new FormatException($"Mismatched number of quotes: {rule}");
}
}
break;
case Space:
case Tab:
// time to capture!
var token = context.Capture();
if (!string.IsNullOrEmpty(token))
{
tokens.Add(token);
do
{
if (!context.Next())
{
// At end of string, we can return at this point.
RemoveQuotesAndEscapeCharacters(tokens);
return tokens;
}
} while (context.Current == Space || context.Current == Tab);
context.Mark();
context.Back();
}
break;
}
if (!context.Next())
{
// End of string. Capture.
break;
}
}
var done = context.Capture();
if (!string.IsNullOrEmpty(done))
{
tokens.Add(done);
}
RemoveQuotesAndEscapeCharacters(tokens);
return tokens;
}
// Need to remove leading and trailing slashes if they exist.
// This is on start-up, so more forgivening towards substrings/ new strings
// If this is a perf/memory problem, discuss later.
private static void RemoveQuotesAndEscapeCharacters(IList<string> tokens)
{
for (var i = 0; i < tokens.Count; i++)
{
var token = tokens[i];
var trimmed = token.Trim('\"');
tokens[i] = Regex.Unescape(trimmed);
}
}
}
}