aspnetcore/src/Microsoft.AspNetCore.Rewrite/Internal/ModRewrite/Tokenizer.cs

82 lines
2.9 KiB
C#

// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
namespace Microsoft.AspNetCore.Rewrite.Internal.ModRewrite
{
/// <summary>
/// Tokenizes a mod_rewrite rule, delimited by spaces.
/// </summary>
public class Tokenizer
{
private const char Space = ' ';
private const char Escape = '\\';
private const char Tab = '\t';
/// <summary>
/// Splits a string on whitespace, ignoring spaces, creating into a list of strings.
/// </summary>
/// <param name="rule">The rule to tokenize.</param>
/// <returns>A list of tokens.</returns>
public List<string> Tokenize(string rule)
{
// TODO make list of strings a reference to the original rule? (run into problems with escaped spaces).
// TODO handle "s and probably replace \ character with no slash.
if (string.IsNullOrEmpty(rule))
{
return null;
}
var context = new ParserContext(rule);
context.Next();
var tokens = new List<string>();
context.Mark();
while (true)
{
if (context.Current == Escape)
{
// Need to progress such that the next character is not evaluated.
if (!context.Next())
{
// Means that a character was not escaped appropriately Ex: "foo\"
throw new FormatException($"Invalid escaper character in string: {rule}");
}
}
else if (context.Current == Space || context.Current == Tab)
{
// time to capture!
var token = context.Capture();
if (!string.IsNullOrEmpty(token))
{
tokens.Add(token);
while (context.Current == Space || context.Current == Tab)
{
if (!context.Next())
{
// At end of string, we can return at this point.
return tokens;
}
}
context.Mark();
context.Back();
}
}
if (!context.Next())
{
// End of string. Capture.
break;
}
}
var done = context.Capture();
if (!string.IsNullOrEmpty(done))
{
tokens.Add(done);
}
return tokens;
}
}
}