diff --git a/src/Microsoft.Extensions.WebSockets.Internal/Utf8Validator.cs b/src/Microsoft.Extensions.WebSockets.Internal/Utf8Validator.cs index 5bd5dd76fb..06eed6f4bd 100644 --- a/src/Microsoft.Extensions.WebSockets.Internal/Utf8Validator.cs +++ b/src/Microsoft.Extensions.WebSockets.Internal/Utf8Validator.cs @@ -12,6 +12,9 @@ namespace Microsoft.Extensions.WebSockets.Internal public class Utf8Validator { // Table of UTF-8 code point widths. '0' indicates an invalid first byte. + // 0x80 - 0xBF are the continuation bytes and invalid as first byte. + // 0xC0 - 0xC1 are overlong encodings of ASCII characters + // 0xF5 - 0xFF encode numbers that are larger than the Unicode limit (0x10FFFF) private static readonly byte[] _utf8Width = new byte[256] { /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0F */ @@ -26,10 +29,10 @@ namespace Microsoft.Extensions.WebSockets.Internal /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x9F */ /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xAF */ /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xBF */ - /* 0xC0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xCF */ + /* 0xC0 */ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xCF */ /* 0xD0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xDF */ /* 0xE0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xEF */ - /* 0xF0 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, /* 0xFF */ + /* 0xF0 */ 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xFF */ }; // Table of masks used to extract the code point bits from the first byte. Indexed by (width - 1) diff --git a/test/Microsoft.Extensions.WebSockets.Internal.Tests/Utf8ValidatorTests.cs b/test/Microsoft.Extensions.WebSockets.Internal.Tests/Utf8ValidatorTests.cs index 27e9c9bbef..a3d2d731e0 100644 --- a/test/Microsoft.Extensions.WebSockets.Internal.Tests/Utf8ValidatorTests.cs +++ b/test/Microsoft.Extensions.WebSockets.Internal.Tests/Utf8ValidatorTests.cs @@ -90,12 +90,27 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests // '\u0800' (3 byte char) encoded with 4 bytes [InlineData(new byte[] { 0xF0, 0x80, 0xA0, 0x80 })] + + // Code point larger than what is allowed + [InlineData(new byte[] { 0xF5, 0x80, 0x80, 0x80 })] public void InvalidSingleFramePayloads(byte[] payload) { var validator = new Utf8Validator(); Assert.False(validator.ValidateUtf8Frame(ReadableBuffer.Create(payload), fin: true)); } + [Theory] + + [InlineData(new byte[] { 0xC0 })] // overlong encoding of ASCII + [InlineData(new byte[] { 0xC1 })] // overlong encoding of ASCII + [InlineData(new byte[] { 0xF5 })] // larger than the unicode limit + public void InvalidMultiByteSequencesByFirstByte(byte[] payload) + { + var validator = new Utf8Validator(); + Assert.False(validator.ValidateUtf8Frame(ReadableBuffer.Create(payload), fin: false)); + } + + [Theory] // Continuation byte as first byte of code point @@ -113,8 +128,7 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests // Overlong Encoding - // 'H' (1 byte char) encoded with 2, 3 and 4 bytes - [InlineData(new byte[] { 0xC1 }, new byte[] { 0x88 })] + // 'H' (1 byte char) encoded with 3 and 4 bytes [InlineData(new byte[] { 0xE0 }, new byte[] { 0x81, 0x88 })] [InlineData(new byte[] { 0xF0 }, new byte[] { 0x80, 0x81, 0x88 })] diff --git a/test/Microsoft.Extensions.WebSockets.Internal.Tests/WebSocketConnectionTests.Utf8Validation.cs b/test/Microsoft.Extensions.WebSockets.Internal.Tests/WebSocketConnectionTests.Utf8Validation.cs index 075f40e116..dfbea02e2d 100644 --- a/test/Microsoft.Extensions.WebSockets.Internal.Tests/WebSocketConnectionTests.Utf8Validation.cs +++ b/test/Microsoft.Extensions.WebSockets.Internal.Tests/WebSocketConnectionTests.Utf8Validation.cs @@ -175,8 +175,7 @@ namespace Microsoft.Extensions.WebSockets.Internal.Tests // Overlong Encoding - // 'H' (1 byte char) encoded with 2, 3 and 4 bytes - [InlineData(new byte[] { 0xC1 }, new byte[] { 0x88 })] + // 'H' (1 byte char) encoded with 3 and 4 bytes [InlineData(new byte[] { 0xE0 }, new byte[] { 0x81, 0x88 })] [InlineData(new byte[] { 0xF0 }, new byte[] { 0x80, 0x81, 0x88 })]