#13 - Utf8 validation.

This commit is contained in:
Chris Ross 2014-10-22 15:55:26 -07:00
parent ade78b1aba
commit 4a5dbccb46
3 changed files with 143 additions and 0 deletions

View File

@ -40,6 +40,7 @@ namespace Microsoft.AspNet.WebSockets.Protocol
private long _frameBytesRemaining;
private int? _firstDataOpCode;
private int _dataUnmaskOffset;
private Utilities.Utf8MessageState _incomingUtf8MessageState = new Utilities.Utf8MessageState();
public CommonWebSocket(Stream stream, string subProtocol, TimeSpan keepAliveInterval, int receiveBufferSize, bool maskOutput, bool useZeroMask, bool unmaskInput)
{
@ -251,6 +252,14 @@ namespace Microsoft.AspNet.WebSockets.Protocol
WebSocketReceiveResult result;
WebSocketMessageType messageType = Utilities.GetMessageType(opCode);
if (messageType == WebSocketMessageType.Text
&& !Utilities.TryValidateUtf8(new ArraySegment<byte>(buffer.Array, buffer.Offset, bytesToCopy), _frameInProgress.Fin, _incomingUtf8MessageState))
{
await CloseOutputAsync(WebSocketCloseStatus.InvalidPayloadData, string.Empty, cancellationToken);
throw new InvalidOperationException("An invalid UTF-8 payload was received.");
}
if (bytesToCopy == _frameBytesRemaining)
{
result = new WebSocketReceiveResult(bytesToCopy, messageType, _frameInProgress.Fin);

View File

@ -69,5 +69,77 @@ namespace Microsoft.AspNet.WebSockets.Protocol
default: throw new NotImplementedException(opCode.ToString());
}
}
// For now this is stateless and does not handle sequences spliced across messages.
// http://etutorials.org/Programming/secure+programming/Chapter+3.+Input+Validation/3.12+Detecting+Illegal+UTF-8+Characters/
public static bool TryValidateUtf8(ArraySegment<byte> arraySegment, bool endOfMessage, Utf8MessageState state)
{
for (int i = arraySegment.Offset; i < arraySegment.Offset + arraySegment.Count; )
{
if (!state.SequenceInProgress)
{
state.SequenceInProgress = true;
byte b = arraySegment.Array[i];
if ((b & 0x80) == 0) // 0bbbbbbb, single byte
{
state.AdditionalBytesExpected = 0;
}
else if ((b & 0xC0) == 0x80)
{
return false; // Misplaced 10bbbbbb byte. This cannot be the first byte.
}
else if ((b & 0xE0) == 0xC0) // 110bbbbb 10bbbbbb
{
state.AdditionalBytesExpected = 1;
}
else if ((b & 0xF0) == 0xE0) // 1110bbbb 10bbbbbb 10bbbbbb
{
state.AdditionalBytesExpected = 2;
}
else if ((b & 0xF8) == 0xF0) // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
{
state.AdditionalBytesExpected = 3;
}
else if ((b & 0xFC) == 0xF8) // 111110bb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb
{
state.AdditionalBytesExpected = 4;
}
else if ((b & 0xFE) == 0xFC) // 1111110b 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb
{
state.AdditionalBytesExpected = 5;
}
else // 11111110 && 11111111 are not valid
{
return false;
}
i++;
}
while (state.AdditionalBytesExpected > 0 && i < arraySegment.Offset + arraySegment.Count)
{
byte b = arraySegment.Array[i];
if ((b & 0xC0) != 0x80)
{
return false;
}
state.AdditionalBytesExpected--;
i++;
}
if (state.AdditionalBytesExpected == 0)
{
state.SequenceInProgress = false;
}
}
if (endOfMessage && state.SequenceInProgress)
{
return false;
}
return true;
}
public class Utf8MessageState
{
public bool SequenceInProgress { get; set; }
public int AdditionalBytesExpected { get; set; }
}
}
}

View File

@ -0,0 +1,62 @@
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Text;
using Xunit;
namespace Microsoft.AspNet.WebSockets.Protocol.Test
{
public class Utf8ValidationTests
{
[Theory]
[InlineData(new byte[] { })]
[InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 })] // Hello World
[InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2D, 0xC2, 0xB5, 0x40, 0xC3, 0x9F, 0xC3, 0xB6, 0xC3, 0xA4, 0xC3, 0xBC, 0xC3, 0xA0, 0xC3, 0xA1 })] // "Hello-µ@ßöäüàá";
public void ValidateSingleValidSegments_Valid(byte[] data)
{
var state = new Utilities.Utf8MessageState();
Assert.True(Utilities.TryValidateUtf8(new ArraySegment<byte>(data), endOfMessage: true, state: state));
}
[Theory]
[InlineData(new byte[] { }, new byte[] { }, new byte[] { })]
[InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20 }, new byte[] { }, new byte[] { 0x57, 0x6F, 0x72, 0x6C, 0x64 })] // Hello ,, World
[InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2D, 0xC2, }, new byte[] { 0xB5, 0x40, 0xC3, 0x9F, 0xC3, 0xB6, 0xC3, 0xA4, }, new byte[] { 0xC3, 0xBC, 0xC3, 0xA0, 0xC3, 0xA1 })] // "Hello-µ@ßöäüàá";
public void ValidateMultipleValidSegments_Valid(byte[] data1, byte[] data2, byte[] data3)
{
var state = new Utilities.Utf8MessageState();
Assert.True(Utilities.TryValidateUtf8(new ArraySegment<byte>(data1), endOfMessage: false, state: state));
Assert.True(Utilities.TryValidateUtf8(new ArraySegment<byte>(data2), endOfMessage: false, state: state));
Assert.True(Utilities.TryValidateUtf8(new ArraySegment<byte>(data3), endOfMessage: true, state: state));
}
[Theory]
[InlineData(new byte[] { 0xfe })]
[InlineData(new byte[] { 0xff })]
[InlineData(new byte[] { 0xfe, 0xfe, 0xff, 0xff })]
// [InlineData(new byte[] { 0xc0, 0xaf })]
// [InlineData(new byte[] { 0xe0, 0x80, 0xaf })]
// [InlineData(new byte[] { 0xf4, 0x90, 0x80, 0x80 })]
// [InlineData(new byte[] { 0xf0, 0x80, 0x80, 0xaf })]
// [InlineData(new byte[] { 0xf8, 0x80, 0x80, 0x80, 0xaf })]
// [InlineData(new byte[] { 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf })]
// [InlineData(new byte[] { 0xc1, 0xbf })]
// [InlineData(new byte[] { 0xed, 0xa0, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 })] // 0xEDA080 decodes to 0xD800, which is a reserved high surrogate character.
public void ValidateSingleInvalidSegment_Invalid(byte[] data)
{
var state = new Utilities.Utf8MessageState();
Assert.False(Utilities.TryValidateUtf8(new ArraySegment<byte>(data), endOfMessage: true, state: state));
}
/*
[Theory]
// [InlineData(true, new byte[] { 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0xf4 }, false, new byte[] { 0x90 }, true, new byte[] { })]
public void ValidateMultipleInvalidSegments_Invalid(bool valid1, byte[] data1, bool valid2, byte[] data2, bool valid3, byte[] data3)
{
var state = new Utilities.Utf8MessageState();
Assert.True(valid1 == Utilities.TryValidateUtf8(new ArraySegment<byte>(data1), endOfMessage: false, state: state), "1st");
Assert.True(valid2 == Utilities.TryValidateUtf8(new ArraySegment<byte>(data2), endOfMessage: false, state: state), "2nd");
Assert.True(valid3 == Utilities.TryValidateUtf8(new ArraySegment<byte>(data3), endOfMessage: true, state: state), "3rd");
}*/
}
}