Quickly fail the UFT-8 validation if the first byte is already invalid
This commit is contained in:
parent
841ceb24b6
commit
4c183b4d00
|
|
@ -12,6 +12,9 @@ namespace Microsoft.Extensions.WebSockets.Internal
|
|||
public class Utf8Validator
|
||||
{
|
||||
// Table of UTF-8 code point widths. '0' indicates an invalid first byte.
|
||||
// 0x80 - 0xBF are the continuation bytes and invalid as first byte.
|
||||
// 0xC0 - 0xC1 are overlong encodings of ASCII characters
|
||||
// 0xF5 - 0xFF encode numbers that are larger than the Unicode limit (0x10FFFF)
|
||||
private static readonly byte[] _utf8Width = new byte[256]
|
||||
{
|
||||
/* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0F */
|
||||
|
|
@ -26,10 +29,10 @@ namespace Microsoft.Extensions.WebSockets.Internal
|
|||
/* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x9F */
|
||||
/* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xAF */
|
||||
/* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xBF */
|
||||
/* 0xC0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xCF */
|
||||
/* 0xC0 */ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xCF */
|
||||
/* 0xD0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xDF */
|
||||
/* 0xE0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xEF */
|
||||
/* 0xF0 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, /* 0xFF */
|
||||
/* 0xF0 */ 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xFF */
|
||||
};
|
||||
|
||||
// Table of masks used to extract the code point bits from the first byte. Indexed by (width - 1)
|
||||
|
|
|
|||
|
|
@ -90,12 +90,27 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests
|
|||
|
||||
// '\u0800' (3 byte char) encoded with 4 bytes
|
||||
[InlineData(new byte[] { 0xF0, 0x80, 0xA0, 0x80 })]
|
||||
|
||||
// Code point larger than what is allowed
|
||||
[InlineData(new byte[] { 0xF5, 0x80, 0x80, 0x80 })]
|
||||
public void InvalidSingleFramePayloads(byte[] payload)
|
||||
{
|
||||
var validator = new Utf8Validator();
|
||||
Assert.False(validator.ValidateUtf8Frame(ReadableBuffer.Create(payload), fin: true));
|
||||
}
|
||||
|
||||
[Theory]
|
||||
|
||||
[InlineData(new byte[] { 0xC0 })] // overlong encoding of ASCII
|
||||
[InlineData(new byte[] { 0xC1 })] // overlong encoding of ASCII
|
||||
[InlineData(new byte[] { 0xF5 })] // larger than the unicode limit
|
||||
public void InvalidMultiByteSequencesByFirstByte(byte[] payload)
|
||||
{
|
||||
var validator = new Utf8Validator();
|
||||
Assert.False(validator.ValidateUtf8Frame(ReadableBuffer.Create(payload), fin: false));
|
||||
}
|
||||
|
||||
|
||||
[Theory]
|
||||
|
||||
// Continuation byte as first byte of code point
|
||||
|
|
@ -113,8 +128,7 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests
|
|||
|
||||
// Overlong Encoding
|
||||
|
||||
// 'H' (1 byte char) encoded with 2, 3 and 4 bytes
|
||||
[InlineData(new byte[] { 0xC1 }, new byte[] { 0x88 })]
|
||||
// 'H' (1 byte char) encoded with 3 and 4 bytes
|
||||
[InlineData(new byte[] { 0xE0 }, new byte[] { 0x81, 0x88 })]
|
||||
[InlineData(new byte[] { 0xF0 }, new byte[] { 0x80, 0x81, 0x88 })]
|
||||
|
||||
|
|
|
|||
|
|
@ -175,8 +175,7 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests
|
|||
|
||||
// Overlong Encoding
|
||||
|
||||
// 'H' (1 byte char) encoded with 2, 3 and 4 bytes
|
||||
[InlineData(new byte[] { 0xC1 }, new byte[] { 0x88 })]
|
||||
// 'H' (1 byte char) encoded with 3 and 4 bytes
|
||||
[InlineData(new byte[] { 0xE0 }, new byte[] { 0x81, 0x88 })]
|
||||
[InlineData(new byte[] { 0xF0 }, new byte[] { 0x80, 0x81, 0x88 })]
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue