diff --git a/src/Microsoft.AspNetCore.Mvc.Core/Formatters/TextInputFormatter.cs b/src/Microsoft.AspNetCore.Mvc.Core/Formatters/TextInputFormatter.cs index 8967511c00..761b780eb1 100644 --- a/src/Microsoft.AspNetCore.Mvc.Core/Formatters/TextInputFormatter.cs +++ b/src/Microsoft.AspNetCore.Mvc.Core/Formatters/TextInputFormatter.cs @@ -7,6 +7,7 @@ using System.Text; using System.Threading.Tasks; using Microsoft.AspNetCore.Mvc.Core; using Microsoft.AspNetCore.Mvc.ModelBinding; +using Microsoft.Extensions.Primitives; namespace Microsoft.AspNetCore.Mvc.Formatters { @@ -15,6 +16,50 @@ namespace Microsoft.AspNetCore.Mvc.Formatters /// public abstract class TextInputFormatter : InputFormatter { + // ASP.NET Core MVC 1.0 used Encoding.GetEncoding() when comparing the request charset with SupportedEncodings. + // That method supports the following alternate names for system-supported encodings. This table maps from + // .NET Core's keys in s_encodingDataTable to values in s_encodingDataTableItems, less identity mappings. It + // should be kept in sync with those Unix-specific tables in + // https://github.com/dotnet/coreclr/blob/master/src/mscorlib/src/System/Globalization/EncodingTable.Unix.cs + // or their Windows equivalents (also for .NET Core not desktop .NET): COMNlsInfo::EncodingDataTable and + // COMNlsInfo::CodePageDataTable in EncodingData in + // https://github.com/dotnet/coreclr/blob/master/src/classlibnative/nls/encodingdata.cpp + private static readonly IReadOnlyDictionary EncodingAliases = new Dictionary + { + { "ANSI_X3.4-1968", "us-ascii" }, + { "ANSI_X3.4-1986", "us-ascii" }, + { "ascii", "us-ascii" }, + { "cp367", "us-ascii" }, + { "cp819", "iso-8859-1" }, + { "csASCII", "us-ascii" }, + { "csISOLatin1", "iso-8859-1" }, + { "csUnicode11UTF7", "utf-7" }, + { "IBM367", "us-ascii" }, + { "ibm819", "iso-8859-1" }, + { "ISO-10646-UCS-2", "utf-16" }, + { "iso-ir-100", "iso-8859-1" }, + { "iso-ir-6", "us-ascii" }, + { "ISO646-US", "us-ascii" }, + { "ISO_646.irv:1991", "us-ascii" }, + { "iso_8859-1:1987", "iso-8859-1" }, + { "l1", "iso-8859-1" }, + { "latin1", "iso-8859-1" }, + { "ucs-2", "utf-16" }, + { "unicode", "utf-16"}, + { "unicode-1-1-utf-7", "utf-7" }, + { "unicode-1-1-utf-8", "utf-8" }, + { "unicode-2-0-utf-7", "utf-7" }, + { "unicode-2-0-utf-8", "utf-8" }, + { "unicodeFFFE", "utf-16BE"}, + { "us", "us-ascii" }, + { "UTF-16LE", "utf-16"}, + { "UTF-32LE", "utf-32" }, + { "x-unicode-1-1-utf-7", "utf-7" }, + { "x-unicode-1-1-utf-8", "utf-8" }, + { "x-unicode-2-0-utf-7", "utf-7" }, + { "x-unicode-2-0-utf-8", "utf-8" }, + }; + /// /// Returns UTF8 Encoding without BOM and throws on invalid bytes. /// @@ -64,7 +109,9 @@ namespace Microsoft.AspNetCore.Mvc.Formatters /// The . /// The used to read the request body. /// A that on completion deserializes the request body. - public abstract Task ReadRequestBodyAsync(InputFormatterContext context, Encoding encoding); + public abstract Task ReadRequestBodyAsync( + InputFormatterContext context, + Encoding encoding); /// /// Returns an based on 's @@ -82,23 +129,30 @@ namespace Microsoft.AspNetCore.Mvc.Formatters throw new ArgumentNullException(nameof(context)); } - if (SupportedEncodings?.Count == 0) + if (SupportedEncodings.Count == 0) { - var message = Resources.FormatTextInputFormatter_SupportedEncodingsMustNotBeEmpty(nameof(SupportedEncodings)); + var message = Resources.FormatTextInputFormatter_SupportedEncodingsMustNotBeEmpty( + nameof(SupportedEncodings)); throw new InvalidOperationException(message); } - var request = context.HttpContext.Request; - - var requestEncoding = request.ContentType == null ? null : MediaType.GetEncoding(request.ContentType); - if (requestEncoding != null) + var requestContentType = context.HttpContext.Request.ContentType; + var requestEncoding = requestContentType == null ? + default(StringSegment) : + new MediaType(requestContentType).Charset; + if (requestEncoding.HasValue) { + var encodingName = requestEncoding.Value; + string alias; + if (EncodingAliases.TryGetValue(encodingName, out alias)) + { + // Given name was an encoding alias. Use the preferred name. + encodingName = alias; + } + for (int i = 0; i < SupportedEncodings.Count; i++) { - if (string.Equals( - requestEncoding.WebName, - SupportedEncodings[i].WebName, - StringComparison.OrdinalIgnoreCase)) + if (string.Equals(encodingName, SupportedEncodings[i].WebName, StringComparison.OrdinalIgnoreCase)) { return SupportedEncodings[i]; } @@ -116,7 +170,7 @@ namespace Microsoft.AspNetCore.Mvc.Formatters // cases where the client doesn't send a content type header or sends a content // type header without encoding. For that reason we pick the first encoding of the // list of supported encodings and try to use that to read the body. This encoding - // is UTF-8 by default on our formatters, which generally is a safe choice for the + // is UTF-8 by default in our formatters, which generally is a safe choice for the // encoding. return SupportedEncodings[0]; } diff --git a/test/Microsoft.AspNetCore.Mvc.Core.Test/Formatters/TextInputFormatterTest.cs b/test/Microsoft.AspNetCore.Mvc.Core.Test/Formatters/TextInputFormatterTest.cs index c44135c856..f781cdf303 100644 --- a/test/Microsoft.AspNetCore.Mvc.Core.Test/Formatters/TextInputFormatterTest.cs +++ b/test/Microsoft.AspNetCore.Mvc.Core.Test/Formatters/TextInputFormatterTest.cs @@ -62,8 +62,10 @@ namespace Microsoft.AspNetCore.Mvc.Formatters Assert.Throws(() => formatter.TestSelectCharacterEncoding(context)); } - [Fact] - public void SelectCharacterEncoding_ReturnsNull_IfItCanNotUnderstandContentTypeEncoding() + [Theory] + [InlineData("utf-8")] + [InlineData("invalid")] + public void SelectCharacterEncoding_ReturnsNull_IfItCanNotUnderstandContentTypeEncoding(string charset) { // Arrange var formatter = new TestFormatter(); @@ -76,7 +78,7 @@ namespace Microsoft.AspNetCore.Mvc.Formatters new EmptyModelMetadataProvider().GetMetadataForType(typeof(object)), (stream, encoding) => new StreamReader(stream, encoding)); - context.HttpContext.Request.ContentType = "application/json;charset=utf-8"; + context.HttpContext.Request.ContentType = "application/json;charset=" + charset; // Act var result = formatter.TestSelectCharacterEncoding(context); @@ -110,9 +112,79 @@ namespace Microsoft.AspNetCore.Mvc.Formatters } [Theory] - [InlineData("application/json")] + [InlineData("unicode-1-1-utf-8")] + [InlineData("unicode-2-0-utf-8")] + [InlineData("unicode-1-1-utf-8")] + [InlineData("unicode-2-0-utf-8")] + public void SelectCharacterEncoding_ReturnsUTF8Encoding_IfContentTypeIsAnAlias(string charset) + { + // Arrange + var formatter = new TestFormatter(); + formatter.SupportedEncodings.Add(Encoding.UTF32); + formatter.SupportedEncodings.Add(Encoding.UTF8); + + var context = new InputFormatterContext( + new DefaultHttpContext(), + "something", + new ModelStateDictionary(), + new EmptyModelMetadataProvider().GetMetadataForType(typeof(object)), + (stream, encoding) => new StreamReader(stream, encoding)); + + context.HttpContext.Request.ContentType = "application/json;charset=" + charset; + + // Act + var result = formatter.TestSelectCharacterEncoding(context); + + // Assert + Assert.Equal(Encoding.UTF8, result); + } + + [Theory] + [InlineData("ANSI_X3.4-1968")] + [InlineData("ANSI_X3.4-1986")] + [InlineData("ascii")] + [InlineData("cp367")] + [InlineData("csASCII")] + [InlineData("IBM367")] + [InlineData("iso-ir-6")] + [InlineData("ISO646-US")] + [InlineData("ISO_646.irv:1991")] + [InlineData("us")] + public void SelectCharacterEncoding_ReturnsAsciiEncoding_IfContentTypeIsAnAlias(string charset) + { + // Arrange + var formatter = new TestFormatter(); + formatter.SupportedEncodings.Add(Encoding.UTF32); + formatter.SupportedEncodings.Add(Encoding.ASCII); + + var context = new InputFormatterContext( + new DefaultHttpContext(), + "something", + new ModelStateDictionary(), + new EmptyModelMetadataProvider().GetMetadataForType(typeof(object)), + (stream, encoding) => new StreamReader(stream, encoding)); + + context.HttpContext.Request.ContentType = "application/json;charset=\"" + charset + "\""; + + // Act + var result = formatter.TestSelectCharacterEncoding(context); + + // Assert + Assert.Equal(Encoding.ASCII, result); + } + + [Theory] [InlineData("")] - public void SelectCharacterEncoding_ReturnsFirstEncoding_IfContentTypeIsNotSpecifiedOrDoesNotHaveEncoding(string contentType) + [InlineData("(garbage)")] + [InlineData("(garbage); charset=utf-32")] + [InlineData("text/(garbage)")] + [InlineData("text/(garbage); charset=utf-32")] + [InlineData("application/json")] + [InlineData("application/json; charset")] + [InlineData("application/json; charset=(garbage)")] + [InlineData("application/json; version=(garbage); charset=utf-32")] + public void SelectCharacterEncoding_ReturnsFirstEncoding_IfContentTypeIsMissingInvalidOrDoesNotHaveEncoding( + string contentType) { // Arrange var formatter = new TestFormatter();