104 lines
4.1 KiB
C#
104 lines
4.1 KiB
C#
using System;
|
|
using System.Globalization;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace UnicodeTablesGenerator
|
|
{
|
|
/// <summary>
|
|
/// This program outputs the 'UnicodeBlocks.generated.txt' and
|
|
/// 'UnicodeBlocksTests.generated.txt' source files.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// The generated files require some hand-tweaking. For instance, you'll need
|
|
/// to remove surrogates and private use blocks. The files can then be merged
|
|
/// into the *.generated.cs files as appropriate.
|
|
/// </remarks>
|
|
class Program
|
|
{
|
|
private const string _codePointFiltersGeneratedFormat = @"
|
|
/// <summary>
|
|
/// A <see cref=""UnicodeRange""/> corresponding to the '{0}' Unicode block (U+{1}..U+{2}).
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// See http://www.unicode.org/charts/PDF/U{1}.pdf for the full set of characters in this block.
|
|
/// </remarks>
|
|
public static UnicodeRange {3} => Volatile.Read(ref _{4}) ?? CreateRange(ref _{4}, first: '\u{1}', last: '\u{2}');
|
|
private static UnicodeRange _{4};
|
|
";
|
|
|
|
private const string _codePointFiltersTestsGeneratedFormat = @"[InlineData('\u{1}', '\u{2}', nameof(UnicodeRanges.{0}))]";
|
|
|
|
private static void Main()
|
|
{
|
|
// The input file should be Blocks.txt from the UCD corresponding to the
|
|
// version of the Unicode spec we're consuming.
|
|
// More info: http://www.unicode.org/reports/tr44/
|
|
// Latest Blocks.txt: http://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
|
|
|
|
StringBuilder runtimeCodeBuilder = new StringBuilder();
|
|
StringBuilder testCodeBuilder = new StringBuilder();
|
|
string[] allLines = File.ReadAllLines("Blocks.txt");
|
|
|
|
Regex regex = new Regex(@"^(?<startCode>[0-9A-F]{4})\.\.(?<endCode>[0-9A-F]{4}); (?<blockName>.+)$");
|
|
|
|
foreach (var line in allLines)
|
|
{
|
|
// We only care about lines of the form "XXXX..XXXX; Block name"
|
|
var match = regex.Match(line);
|
|
if (match == null || !match.Success)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
string startCode = match.Groups["startCode"].Value;
|
|
string endCode = match.Groups["endCode"].Value;
|
|
string blockName = match.Groups["blockName"].Value;
|
|
string blockNameAsProperty = RemoveAllNonAlphanumeric(blockName);
|
|
string blockNameAsField = WithDotNetFieldCasing(blockNameAsProperty);
|
|
|
|
runtimeCodeBuilder.AppendFormat(CultureInfo.InvariantCulture, _codePointFiltersGeneratedFormat,
|
|
blockName, startCode, endCode, blockNameAsProperty, blockNameAsField);
|
|
|
|
testCodeBuilder.AppendFormat(CultureInfo.InvariantCulture, _codePointFiltersTestsGeneratedFormat,
|
|
blockNameAsProperty, startCode, endCode);
|
|
testCodeBuilder.AppendLine();
|
|
}
|
|
|
|
File.WriteAllText("UnicodeRanges.generated.txt", runtimeCodeBuilder.ToString());
|
|
File.WriteAllText("UnicodeRangesTests.generated.txt", testCodeBuilder.ToString());
|
|
}
|
|
|
|
private static string RemoveAllNonAlphanumeric(string blockName)
|
|
{
|
|
// Allow only A-Z 0-9
|
|
return new String(blockName.ToCharArray().Where(c => ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')).ToArray());
|
|
}
|
|
|
|
private static string WithDotNetFieldCasing(string input)
|
|
{
|
|
char[] chars = input.ToCharArray();
|
|
for (int i = 0; i < chars.Length; i++)
|
|
{
|
|
if (Char.IsLower(chars[i]))
|
|
{
|
|
if (i > 1)
|
|
{
|
|
// restore original casing for the previous char unless the previous
|
|
// char was at the front of the string
|
|
chars[i - 1] = input[i - 1];
|
|
}
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
chars[i] = Char.ToLowerInvariant(chars[i]);
|
|
}
|
|
}
|
|
return new String(chars);
|
|
}
|
|
}
|
|
}
|