From 4a5dbccb46acb83cced322b464b0dee82e52c775 Mon Sep 17 00:00:00 2001 From: Chris Ross Date: Wed, 22 Oct 2014 15:55:26 -0700 Subject: [PATCH] #13 - Utf8 validation. --- .../CommonWebSocket.cs | 9 +++ .../Utilities.cs | 72 +++++++++++++++++++ .../Utf8ValidationTests.cs | 62 ++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 test/Microsoft.AspNet.WebSockets.Protocol.Test/Utf8ValidationTests.cs diff --git a/src/Microsoft.AspNet.WebSockets.Protocol/CommonWebSocket.cs b/src/Microsoft.AspNet.WebSockets.Protocol/CommonWebSocket.cs index a5149a128f..191d436157 100644 --- a/src/Microsoft.AspNet.WebSockets.Protocol/CommonWebSocket.cs +++ b/src/Microsoft.AspNet.WebSockets.Protocol/CommonWebSocket.cs @@ -40,6 +40,7 @@ namespace Microsoft.AspNet.WebSockets.Protocol private long _frameBytesRemaining; private int? _firstDataOpCode; private int _dataUnmaskOffset; + private Utilities.Utf8MessageState _incomingUtf8MessageState = new Utilities.Utf8MessageState(); public CommonWebSocket(Stream stream, string subProtocol, TimeSpan keepAliveInterval, int receiveBufferSize, bool maskOutput, bool useZeroMask, bool unmaskInput) { @@ -251,6 +252,14 @@ namespace Microsoft.AspNet.WebSockets.Protocol WebSocketReceiveResult result; WebSocketMessageType messageType = Utilities.GetMessageType(opCode); + + if (messageType == WebSocketMessageType.Text + && !Utilities.TryValidateUtf8(new ArraySegment(buffer.Array, buffer.Offset, bytesToCopy), _frameInProgress.Fin, _incomingUtf8MessageState)) + { + await CloseOutputAsync(WebSocketCloseStatus.InvalidPayloadData, string.Empty, cancellationToken); + throw new InvalidOperationException("An invalid UTF-8 payload was received."); + } + if (bytesToCopy == _frameBytesRemaining) { result = new WebSocketReceiveResult(bytesToCopy, messageType, _frameInProgress.Fin); diff --git a/src/Microsoft.AspNet.WebSockets.Protocol/Utilities.cs b/src/Microsoft.AspNet.WebSockets.Protocol/Utilities.cs index a48a5a184b..a5a27c8299 100644 --- a/src/Microsoft.AspNet.WebSockets.Protocol/Utilities.cs +++ b/src/Microsoft.AspNet.WebSockets.Protocol/Utilities.cs @@ -69,5 +69,77 @@ namespace Microsoft.AspNet.WebSockets.Protocol default: throw new NotImplementedException(opCode.ToString()); } } + + // For now this is stateless and does not handle sequences spliced across messages. + // http://etutorials.org/Programming/secure+programming/Chapter+3.+Input+Validation/3.12+Detecting+Illegal+UTF-8+Characters/ + public static bool TryValidateUtf8(ArraySegment arraySegment, bool endOfMessage, Utf8MessageState state) + { + for (int i = arraySegment.Offset; i < arraySegment.Offset + arraySegment.Count; ) + { + if (!state.SequenceInProgress) + { + state.SequenceInProgress = true; + byte b = arraySegment.Array[i]; + if ((b & 0x80) == 0) // 0bbbbbbb, single byte + { + state.AdditionalBytesExpected = 0; + } + else if ((b & 0xC0) == 0x80) + { + return false; // Misplaced 10bbbbbb byte. This cannot be the first byte. + } + else if ((b & 0xE0) == 0xC0) // 110bbbbb 10bbbbbb + { + state.AdditionalBytesExpected = 1; + } + else if ((b & 0xF0) == 0xE0) // 1110bbbb 10bbbbbb 10bbbbbb + { + state.AdditionalBytesExpected = 2; + } + else if ((b & 0xF8) == 0xF0) // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb + { + state.AdditionalBytesExpected = 3; + } + else if ((b & 0xFC) == 0xF8) // 111110bb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb + { + state.AdditionalBytesExpected = 4; + } + else if ((b & 0xFE) == 0xFC) // 1111110b 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb + { + state.AdditionalBytesExpected = 5; + } + else // 11111110 && 11111111 are not valid + { + return false; + } + i++; + } + while (state.AdditionalBytesExpected > 0 && i < arraySegment.Offset + arraySegment.Count) + { + byte b = arraySegment.Array[i]; + if ((b & 0xC0) != 0x80) + { + return false; + } + state.AdditionalBytesExpected--; + i++; + } + if (state.AdditionalBytesExpected == 0) + { + state.SequenceInProgress = false; + } + } + if (endOfMessage && state.SequenceInProgress) + { + return false; + } + return true; + } + + public class Utf8MessageState + { + public bool SequenceInProgress { get; set; } + public int AdditionalBytesExpected { get; set; } + } } } diff --git a/test/Microsoft.AspNet.WebSockets.Protocol.Test/Utf8ValidationTests.cs b/test/Microsoft.AspNet.WebSockets.Protocol.Test/Utf8ValidationTests.cs new file mode 100644 index 0000000000..a34fa2616d --- /dev/null +++ b/test/Microsoft.AspNet.WebSockets.Protocol.Test/Utf8ValidationTests.cs @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Text; +using Xunit; + +namespace Microsoft.AspNet.WebSockets.Protocol.Test +{ + public class Utf8ValidationTests + { + [Theory] + [InlineData(new byte[] { })] + [InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 })] // Hello World + [InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2D, 0xC2, 0xB5, 0x40, 0xC3, 0x9F, 0xC3, 0xB6, 0xC3, 0xA4, 0xC3, 0xBC, 0xC3, 0xA0, 0xC3, 0xA1 })] // "Hello-µ@ßöäüàá"; + public void ValidateSingleValidSegments_Valid(byte[] data) + { + var state = new Utilities.Utf8MessageState(); + Assert.True(Utilities.TryValidateUtf8(new ArraySegment(data), endOfMessage: true, state: state)); + } + + [Theory] + [InlineData(new byte[] { }, new byte[] { }, new byte[] { })] + [InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20 }, new byte[] { }, new byte[] { 0x57, 0x6F, 0x72, 0x6C, 0x64 })] // Hello ,, World + [InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2D, 0xC2, }, new byte[] { 0xB5, 0x40, 0xC3, 0x9F, 0xC3, 0xB6, 0xC3, 0xA4, }, new byte[] { 0xC3, 0xBC, 0xC3, 0xA0, 0xC3, 0xA1 })] // "Hello-µ@ßöäüàá"; + public void ValidateMultipleValidSegments_Valid(byte[] data1, byte[] data2, byte[] data3) + { + var state = new Utilities.Utf8MessageState(); + Assert.True(Utilities.TryValidateUtf8(new ArraySegment(data1), endOfMessage: false, state: state)); + Assert.True(Utilities.TryValidateUtf8(new ArraySegment(data2), endOfMessage: false, state: state)); + Assert.True(Utilities.TryValidateUtf8(new ArraySegment(data3), endOfMessage: true, state: state)); + } + + [Theory] + [InlineData(new byte[] { 0xfe })] + [InlineData(new byte[] { 0xff })] + [InlineData(new byte[] { 0xfe, 0xfe, 0xff, 0xff })] + // [InlineData(new byte[] { 0xc0, 0xaf })] + // [InlineData(new byte[] { 0xe0, 0x80, 0xaf })] + // [InlineData(new byte[] { 0xf4, 0x90, 0x80, 0x80 })] + // [InlineData(new byte[] { 0xf0, 0x80, 0x80, 0xaf })] + // [InlineData(new byte[] { 0xf8, 0x80, 0x80, 0x80, 0xaf })] + // [InlineData(new byte[] { 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf })] + // [InlineData(new byte[] { 0xc1, 0xbf })] + // [InlineData(new byte[] { 0xed, 0xa0, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 })] // 0xEDA080 decodes to 0xD800, which is a reserved high surrogate character. + public void ValidateSingleInvalidSegment_Invalid(byte[] data) + { + var state = new Utilities.Utf8MessageState(); + Assert.False(Utilities.TryValidateUtf8(new ArraySegment(data), endOfMessage: true, state: state)); + } + /* + [Theory] + // [InlineData(true, new byte[] { 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0xf4 }, false, new byte[] { 0x90 }, true, new byte[] { })] + public void ValidateMultipleInvalidSegments_Invalid(bool valid1, byte[] data1, bool valid2, byte[] data2, bool valid3, byte[] data3) + { + var state = new Utilities.Utf8MessageState(); + Assert.True(valid1 == Utilities.TryValidateUtf8(new ArraySegment(data1), endOfMessage: false, state: state), "1st"); + Assert.True(valid2 == Utilities.TryValidateUtf8(new ArraySegment(data2), endOfMessage: false, state: state), "2nd"); + Assert.True(valid3 == Utilities.TryValidateUtf8(new ArraySegment(data3), endOfMessage: true, state: state), "3rd"); + }*/ + } +} \ No newline at end of file