Quickly fail the UFT-8 validation if the first byte is already invalid

This commit is contained in:
Kai Ruhnau 2017-04-08 20:05:55 +02:00 committed by Andrew Stanton-Nurse
parent 841ceb24b6
commit 4c183b4d00
3 changed files with 22 additions and 6 deletions

View File

@ -12,6 +12,9 @@ namespace Microsoft.Extensions.WebSockets.Internal
public class Utf8Validator
{
// Table of UTF-8 code point widths. '0' indicates an invalid first byte.
// 0x80 - 0xBF are the continuation bytes and invalid as first byte.
// 0xC0 - 0xC1 are overlong encodings of ASCII characters
// 0xF5 - 0xFF encode numbers that are larger than the Unicode limit (0x10FFFF)
private static readonly byte[] _utf8Width = new byte[256]
{
/* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0F */
@ -26,10 +29,10 @@ namespace Microsoft.Extensions.WebSockets.Internal
/* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x9F */
/* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xAF */
/* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xBF */
/* 0xC0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xCF */
/* 0xC0 */ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xCF */
/* 0xD0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xDF */
/* 0xE0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xEF */
/* 0xF0 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, /* 0xFF */
/* 0xF0 */ 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xFF */
};
// Table of masks used to extract the code point bits from the first byte. Indexed by (width - 1)

View File

@ -90,12 +90,27 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests
// '\u0800' (3 byte char) encoded with 4 bytes
[InlineData(new byte[] { 0xF0, 0x80, 0xA0, 0x80 })]
// Code point larger than what is allowed
[InlineData(new byte[] { 0xF5, 0x80, 0x80, 0x80 })]
public void InvalidSingleFramePayloads(byte[] payload)
{
var validator = new Utf8Validator();
Assert.False(validator.ValidateUtf8Frame(ReadableBuffer.Create(payload), fin: true));
}
[Theory]
[InlineData(new byte[] { 0xC0 })] // overlong encoding of ASCII
[InlineData(new byte[] { 0xC1 })] // overlong encoding of ASCII
[InlineData(new byte[] { 0xF5 })] // larger than the unicode limit
public void InvalidMultiByteSequencesByFirstByte(byte[] payload)
{
var validator = new Utf8Validator();
Assert.False(validator.ValidateUtf8Frame(ReadableBuffer.Create(payload), fin: false));
}
[Theory]
// Continuation byte as first byte of code point
@ -113,8 +128,7 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests
// Overlong Encoding
// 'H' (1 byte char) encoded with 2, 3 and 4 bytes
[InlineData(new byte[] { 0xC1 }, new byte[] { 0x88 })]
// 'H' (1 byte char) encoded with 3 and 4 bytes
[InlineData(new byte[] { 0xE0 }, new byte[] { 0x81, 0x88 })]
[InlineData(new byte[] { 0xF0 }, new byte[] { 0x80, 0x81, 0x88 })]

View File

@ -175,8 +175,7 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests
// Overlong Encoding
// 'H' (1 byte char) encoded with 2, 3 and 4 bytes
[InlineData(new byte[] { 0xC1 }, new byte[] { 0x88 })]
// 'H' (1 byte char) encoded with 3 and 4 bytes
[InlineData(new byte[] { 0xE0 }, new byte[] { 0x81, 0x88 })]
[InlineData(new byte[] { 0xF0 }, new byte[] { 0x80, 0x81, 0x88 })]