116 lines
4.3 KiB
C#
116 lines
4.3 KiB
C#
// Copyright (c) .NET Foundation. All rights reserved.
|
|
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace Microsoft.AspNetCore.Rewrite.Internal.ModRewrite
|
|
{
|
|
/// <summary>
|
|
/// Tokenizes a mod_rewrite rule, delimited by spaces.
|
|
/// </summary>
|
|
public class Tokenizer
|
|
{
|
|
private const char Space = ' ';
|
|
private const char Escape = '\\';
|
|
private const char Tab = '\t';
|
|
private const char Quote = '"';
|
|
|
|
/// <summary>
|
|
/// Splits a string on whitespace, ignoring spaces, creating into a list of strings.
|
|
/// </summary>
|
|
/// <param name="rule">The rule to tokenize.</param>
|
|
/// <returns>A list of tokens.</returns>
|
|
public IList<string> Tokenize(string rule)
|
|
{
|
|
// TODO make list of strings a reference to the original rule? (run into problems with escaped spaces).
|
|
// TODO handle "s and probably replace \ character with no slash.
|
|
if (string.IsNullOrEmpty(rule))
|
|
{
|
|
return null;
|
|
}
|
|
var context = new ParserContext(rule);
|
|
context.Next();
|
|
|
|
var tokens = new List<string>();
|
|
context.Mark();
|
|
while (true)
|
|
{
|
|
switch (context.Current)
|
|
{
|
|
case Escape:
|
|
// Need to progress such that the next character is not evaluated.
|
|
if (!context.Next())
|
|
{
|
|
// Means that a character was not escaped appropriately Ex: "foo\"
|
|
throw new FormatException($"Invalid escaper character in string: {rule}");
|
|
}
|
|
break;
|
|
case Quote:
|
|
// Ignore all characters until the next quote is hit
|
|
if (!context.Next())
|
|
{
|
|
throw new FormatException($"Mismatched number of quotes: {rule}");
|
|
}
|
|
|
|
while (context.Current != Quote)
|
|
{
|
|
if (!context.Next())
|
|
{
|
|
throw new FormatException($"Mismatched number of quotes: {rule}");
|
|
}
|
|
}
|
|
break;
|
|
case Space:
|
|
case Tab:
|
|
// time to capture!
|
|
var token = context.Capture();
|
|
if (!string.IsNullOrEmpty(token))
|
|
{
|
|
tokens.Add(token);
|
|
do
|
|
{
|
|
if (!context.Next())
|
|
{
|
|
// At end of string, we can return at this point.
|
|
RemoveQuotesAndEscapeCharacters(tokens);
|
|
return tokens;
|
|
}
|
|
} while (context.Current == Space || context.Current == Tab);
|
|
context.Mark();
|
|
context.Back();
|
|
}
|
|
break;
|
|
}
|
|
if (!context.Next())
|
|
{
|
|
// End of string. Capture.
|
|
break;
|
|
}
|
|
}
|
|
var done = context.Capture();
|
|
if (!string.IsNullOrEmpty(done))
|
|
{
|
|
tokens.Add(done);
|
|
}
|
|
|
|
RemoveQuotesAndEscapeCharacters(tokens);
|
|
return tokens;
|
|
}
|
|
|
|
// Need to remove leading and trailing slashes if they exist.
|
|
// This is on start-up, so more forgivening towards substrings/ new strings
|
|
// If this is a perf/memory problem, discuss later.
|
|
private static void RemoveQuotesAndEscapeCharacters(IList<string> tokens)
|
|
{
|
|
for (var i = 0; i < tokens.Count; i++)
|
|
{
|
|
var token = tokens[i];
|
|
var trimmed = token.Trim('\"');
|
|
tokens[i] = Regex.Unescape(trimmed);
|
|
}
|
|
}
|
|
}
|
|
}
|