From ff78034a1c050e21cbcf8e9b5da4f8f51161afbc Mon Sep 17 00:00:00 2001 From: fanoI Date: Sun, 7 Jan 2018 21:25:07 +0100 Subject: [PATCH] Added CP437 and CP858 (DOS Latin1) support to Encoding, CP858 is not working for VMT bugs. --- .../System/EncodingTest.cs | 289 ++++++++++++++++-- source/Cosmos.Core_Plugs/System/StringImpl.cs | 14 +- source/Cosmos.System2/Text/CP437Encoding.cs | 38 +++ source/Cosmos.System2/Text/CP858Encoding.cs | 31 ++ .../Text/CosmosEncodingProvider.cs | 39 +++ source/Cosmos.System2/Text/EncodingTable.cs | 60 ++++ .../Cosmos.System2/Text/SingleByteEncoding.cs | 187 ++++++++++++ .../System/Globalization/EncodingTableImpl.cs | 31 ++ 8 files changed, 664 insertions(+), 25 deletions(-) create mode 100644 source/Cosmos.System2/Text/CP437Encoding.cs create mode 100644 source/Cosmos.System2/Text/CP858Encoding.cs create mode 100644 source/Cosmos.System2/Text/CosmosEncodingProvider.cs create mode 100644 source/Cosmos.System2/Text/EncodingTable.cs create mode 100644 source/Cosmos.System2/Text/SingleByteEncoding.cs create mode 100644 source/Cosmos.System2_Plugs/System/Globalization/EncodingTableImpl.cs diff --git a/Tests/Cosmos.Compiler.Tests.Bcl/System/EncodingTest.cs b/Tests/Cosmos.Compiler.Tests.Bcl/System/EncodingTest.cs index 5d1c1ae0a..24179de32 100644 --- a/Tests/Cosmos.Compiler.Tests.Bcl/System/EncodingTest.cs +++ b/Tests/Cosmos.Compiler.Tests.Bcl/System/EncodingTest.cs @@ -1,13 +1,18 @@ -using System; +#define COSMOSDEBUG +using System; using System.Collections.Generic; using System.Text; using Cosmos.TestRunner; using Cosmos.Compiler.Tests.Bcl.Helper; +using Cosmos.Debug.Kernel; +using Cosmos.System.ExtendedASCII; namespace Cosmos.Compiler.Tests.Bcl.System { class EncodingTest { + static Debugger mDebugger = new Debugger("System", "Enconding Test"); + static byte[] UTF8EnglishText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x69, 0x73, 0x20, 0x77, 0x6F, 0x6E, 0x64, 0x65, 0x72, 0x66, 0x75, 0x6C, 0x21 }; static byte[] UTF8ItalianText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0xC3, 0xA8, 0x20, @@ -26,90 +31,326 @@ namespace Cosmos.Compiler.Tests.Bcl.System 0xE6, 0x99, 0xB4, 0xE3, 0x82, 0x89, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0x84, 0xE3, 0x81, 0xA7, 0xE3, 0x81, 0x99, 0x21 }; static byte[] UTF8GothicText = new byte[] { 0xF0, 0x90, 0x8D, 0x88 }; + static byte[] CP437EnglishText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x69, 0x73, 0x20, + 0x77, 0x6F, 0x6E, 0x64, 0x65, 0x72, 0x66, 0x75, 0x6C, 0x21 }; + static byte[] CP437ItalianText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x8A, 0x20, 0x66, + 0x61, 0x6E, 0x74, 0x61, 0x73, 0x74, 0x69, 0x63, 0x6F, 0x21 }; + + static byte[] CP437SpanishText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x65, 0x73, 0x20, + 0x67, 0x65, 0x6E, 0x69, 0x61, 0x6C, 0x21 }; + + static byte[] CP437GermanicText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x69, 0x73, 0x74, 0x20, + 0x67, 0x72, 0x6F, 0xE1, 0x61, 0x72, 0x74, 0x69, 0x67, 0x21 }; + static byte[] CP437GreekText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0xEE, 0x3F, 0x3F, 0xE0, + 0x3F, 0x20, 0x3F, 0xE3, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x21 }; + static byte[] CP437JapanaseText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x3F, 0x3F, 0x21 }; + static byte[] CP437GothicText = new byte[] { 0x3F, 0x3F }; + static byte[] CP858EnglishText = CP437EnglishText; + static byte[] CP858ItalianText = CP437ItalianText; + static byte[] CP858SpanishText = CP437SpanishText; + static byte[] CP858GermanicText = CP437GermanicText; + /* CP858 has no Greek characters they are all replaced by '?' (0x3F) */ + static byte[] CP858GreekText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x3F, 0x3F, 0x3F, 0x3F, + 0x3F, 0x20, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x21 }; + static byte[] CP858JapanaseText = CP437JapanaseText; + static byte[] CP858GothicText = CP437GothicText; + + static void TestGetBytes(Encoding xEncoding, string text, byte[] expectedResult, string desc) + { + byte[] result; + + result = xEncoding.GetBytes(text); + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), $"{xEncoding.BodyName} Encoding of {desc} text failed byte arrays different"); + } + + + static void TestGetString(Encoding xEncoding, byte[] bytes, string expectedText, string desc) + { + string text; + + text = xEncoding.GetString(bytes); + Assert.IsTrue((text == expectedText), $"{xEncoding.BodyName} Decoding of {desc} text failed strings different"); + } + + static void TestUTF8() + { + Encoding xEncoding = new UTF8Encoding(); + + mDebugger.SendInternal($"Starting Test {xEncoding.BodyName} Encoding / Decoding"); + + TestGetBytes(xEncoding, "Cosmos is wonderful!", UTF8EnglishText, "English"); + TestGetBytes(xEncoding, "Cosmos è fantastico!", UTF8ItalianText, "Italian"); + TestGetBytes(xEncoding, "Cosmos es genial!", UTF8SpanishText, "Spanish"); + TestGetBytes(xEncoding, "Cosmos ist großartig!", UTF8GermanicText, "Germanic"); + TestGetBytes(xEncoding, "Cosmos είναι υπέροχος!", UTF8GreekText, "Greek"); + TestGetBytes(xEncoding, "Cosmos 素晴らしいです!", UTF8JapanaseText, "Japanese"); + TestGetBytes(xEncoding, "𐍈", UTF8GothicText, "Gothic"); + + TestGetString(xEncoding, UTF8EnglishText, "Cosmos is wonderful!", "English"); + TestGetString(xEncoding, UTF8ItalianText, "Cosmos è fantastico!", "Italian"); + TestGetString(xEncoding, UTF8SpanishText, "Cosmos es genial!", "Spanish"); + TestGetString(xEncoding, UTF8GermanicText, "Cosmos ist großartig!", "Germanic"); + /* CP437 replaces not representable characters with '?' */ + TestGetString(xEncoding, UTF8GreekText, "Cosmos είναι υπέροχος!", "Greek"); + TestGetString(xEncoding, UTF8JapanaseText, "Cosmos 素晴らしいです!", "Japanese"); + TestGetString(xEncoding, UTF8GothicText, "𐍈", "Gothic"); + + mDebugger.SendInternal($"Finished Test {xEncoding.BodyName} Encoding / Decoding"); + } + + static void TestCP437() + { + Encoding xEncoding = Encoding.GetEncoding(437); + + mDebugger.SendInternal($"Starting Test {xEncoding.BodyName} Encoding / Decoding"); + + TestGetBytes(xEncoding, "Cosmos is wonderful!", CP437EnglishText, "English"); + TestGetBytes(xEncoding, "Cosmos è fantastico!", CP437ItalianText, "Italian"); + TestGetBytes(xEncoding, "Cosmos es genial!", CP437SpanishText, "Spanish"); + TestGetBytes(xEncoding, "Cosmos ist großartig!", CP437GermanicText, "Germanic"); + /* + * From this point on a lot of characters will be replaced by 0x3F ('?') because + * cannot really represented on CP437 + */ + TestGetBytes(xEncoding, "Cosmos είναι υπέροχος!", CP437GreekText, "Greek"); + TestGetBytes(xEncoding, "Cosmos 素晴らしいです!", CP437JapanaseText, "Japanese"); + TestGetBytes(xEncoding, "𐍈", CP437GothicText, "Gothic"); + + TestGetString(xEncoding, CP437EnglishText, "Cosmos is wonderful!", "English"); + TestGetString(xEncoding, CP437ItalianText, "Cosmos è fantastico!", "Italian"); + TestGetString(xEncoding, CP437SpanishText, "Cosmos es genial!", "Spanish"); + TestGetString(xEncoding, CP437GermanicText, "Cosmos ist großartig!", "Germanic"); + /* CP437 replaces not representable characters with '?' */ + TestGetString(xEncoding, CP437GreekText, "Cosmos ε??α? ?π??????!", "Greek"); + TestGetString(xEncoding, CP437JapanaseText, "Cosmos ???????!", "Japanese"); + TestGetString(xEncoding, CP437GothicText, "??", "Gothic"); + + mDebugger.SendInternal("Finished Test {xEncoding.BodyName} Encoding / Decoding"); + } + + static void TestCP858() + { + Encoding xEncoding = Encoding.GetEncoding(858); + + mDebugger.SendInternal($"Starting Test {xEncoding.BodyName} Encoding / Decoding"); + + TestGetBytes(xEncoding, "Cosmos is wonderful!", CP858EnglishText, "English"); + TestGetBytes(xEncoding, "Cosmos è fantastico!", CP858ItalianText, "Italian"); + TestGetBytes(xEncoding, "Cosmos es genial!", CP858SpanishText, "Spanish"); + TestGetBytes(xEncoding, "Cosmos ist großartig!", CP858GermanicText, "Germanic"); + /* + * From this point on a lot of characters will be replaced by 0x3F ('?') because + * cannot really represented on CP858 + */ + TestGetBytes(xEncoding, "Cosmos είναι υπέροχος!", CP858GreekText, "Greek"); + TestGetBytes(xEncoding, "Cosmos 素晴らしいです!", CP858JapanaseText, "Japanese"); + TestGetBytes(xEncoding, "𐍈", CP858GothicText, "Gothic"); + + TestGetString(xEncoding, CP858EnglishText, "Cosmos is wonderful!", "English"); + TestGetString(xEncoding, CP858ItalianText, "Cosmos è fantastico!", "Italian"); + TestGetString(xEncoding, CP858SpanishText, "Cosmos es genial!", "Spanish"); + TestGetString(xEncoding, CP858GermanicText, "Cosmos ist großartig!", "Germanic"); + /* CP858 replaces not representable characters with '?' */ + TestGetString(xEncoding, CP858GreekText, "Cosmos ????? ????????!", "Greek"); + TestGetString(xEncoding, CP858JapanaseText, "Cosmos ???????!", "Japanese"); + TestGetString(xEncoding, CP858GothicText, "??", "Gothic"); + + mDebugger.SendInternal("Finished Test CP858 Encoding / Decoding"); + } public static void Execute() { - //CosmosUTF8Encoding Encoder = new CosmosUTF8Encoding(); - //Encoder Encoder = new UTF8Encoding().GetEncoder(); - Encoding Encoder = new UTF8Encoding(); + /* + * Net Core has removed all the legacy codepages from Encoding, only Unicode and ASCII are supported + * the correct way to add them is to create an Encoding Provider. + * Microsoft has created a CodePageEncodingProvider for this but it is too much complex to use it in + * Cosmos now, but we should use surely this in future. + * As a replacement for it I have created CosmosEncodingProvider that is more simple (but less efficient). + */ + Encoding.RegisterProvider(CosmosEncodingProvider.Instance); + + //TestUTF8(); + // TestAscii(); + + TestCP437(); + TestCP858(); + } + +#if false + public static void Execute() + { + /* + * Net Core has removed all the legacy codepages from Encoding, only Unicode and ASCII are supported + * the correct way to add them is to create an Encoding Provider. + * Microsoft has created a CodePageEncodingProvider for this but it is too much complex to use it in + * Cosmos now, but we should use surely this in future. + * As a replacement for it I have created CosmosEncodingProvider that is more simple (but less efficient). + */ + Encoding.RegisterProvider(CosmosEncodingProvider.Instance); + + Encoding xEncoding = new UTF8Encoding(); string text; byte[] result; byte[] expectedResult; + Assert.IsTrue(!xEncoding.IsSingleByte, "IsSingleByte failed return true for UTF8"); + +#if true text = "Cosmos is wonderful!"; - result = Encoder.GetBytes(text); + result = xEncoding.GetBytes(text); expectedResult = UTF8EnglishText; Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of English text failed byte arrays different"); text = "Cosmos è fantastico!"; - result = Encoder.GetBytes(text); + result = xEncoding.GetBytes(text); expectedResult = UTF8ItalianText; Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Italian text failed byte arrays different"); text = "Cosmos es genial!"; - result = Encoder.GetBytes(text); + result = xEncoding.GetBytes(text); expectedResult = UTF8SpanishText; Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Spanish text failed byte arrays different"); text = "Cosmos ist großartig!"; - result = Encoder.GetBytes(text); + result = xEncoding.GetBytes(text); expectedResult = UTF8GermanicText; Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Germanic text failed byte arrays different"); text = "Cosmos είναι υπέροχος!"; - result = Encoder.GetBytes(text); + result = xEncoding.GetBytes(text); expectedResult = UTF8GreekText; Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Greek text failed byte arrays different"); text = "Cosmos 素晴らしいです!"; - result = Encoder.GetBytes(text); + result = xEncoding.GetBytes(text); expectedResult = UTF8JapanaseText; Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Japanese text failed byte arrays different"); /* This the only case on which UFT-16 must use a surrugate pairs... it is a Gothic letter go figure! */ text = "𐍈"; - result = Encoder.GetBytes(text); + result = xEncoding.GetBytes(text); expectedResult = UTF8GothicText; Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Gothic text failed byte arrays different"); /* Now we do the other way: we have a UFT8 byte array and try to convert it in a UFT16 String */ string expectedText; - text = Encoder.GetString(UTF8EnglishText); + text = xEncoding.GetString(UTF8EnglishText); expectedText = "Cosmos is wonderful!"; Assert.IsTrue((text == expectedText), "UTF8 Decoding of English text failed strings different"); - text = Encoder.GetString(UTF8ItalianText); + text = xEncoding.GetString(UTF8ItalianText); expectedText = "Cosmos è fantastico!"; Assert.IsTrue((text == expectedText), "UTF8 Decoding of Italian text failed strings different"); - text = Encoder.GetString(UTF8SpanishText); + text = xEncoding.GetString(UTF8SpanishText); expectedText = "Cosmos es genial!"; Assert.IsTrue((text == expectedText), "UTF8 Decoding of Spanish text failed strings different"); - text = Encoder.GetString(UTF8GermanicText); + text = xEncoding.GetString(UTF8GermanicText); expectedText = "Cosmos ist großartig!"; Assert.IsTrue((text == expectedText), "UTF8 Decoding of Germanic text failed strings different"); - text = Encoder.GetString(UTF8GreekText); + text = xEncoding.GetString(UTF8GreekText); expectedText = "Cosmos είναι υπέροχος!"; Assert.IsTrue((text == expectedText), "UTF8 Decoding of Greek text failed strings different"); - text = Encoder.GetString(UTF8JapanaseText); + text = xEncoding.GetString(UTF8JapanaseText); expectedText = "Cosmos 素晴らしいです!"; Assert.IsTrue((text == expectedText), "UTF8 Decoding of Japanese text failed strings different"); - text = Encoder.GetString(UTF8GothicText); + text = xEncoding.GetString(UTF8GothicText); expectedText = "𐍈"; Assert.IsTrue((text == expectedText), "UTF8 Decoding of Gothic text failed strings different"); - /* But this not work is searching '437' in some native Windows tables, we need plugs for this sadly! */ - //Encoder = Encoding.GetEncoding(437); - //text = "àèìòù"; - //result = Encoder.GetBytes(text); - //expectedResult = new byte[] { 0x85, 0x8A, 0x8D, 0x95, 0x97 }; - //Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of accents text failed byte arrays different"); + xEncoding = Encoding.ASCII; + Assert.IsTrue(xEncoding.IsSingleByte, "IsSingleByte failed return false for ASCII"); + + text = "Cosmos is wonderful!"; + result = xEncoding.GetBytes(text); + expectedResult = UTF8EnglishText; + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "Ascii Encoding of English text failed byte arrays different"); +#endif + + xEncoding = Encoding.GetEncoding(437); + var yEncoding = Encoding.GetEncoding("IBM437"); + + Assert.IsTrue(xEncoding.CodePage == yEncoding.CodePage, "437 and 'IBM437' not the same Encoding"); + + Assert.IsTrue(xEncoding.IsSingleByte, "IsSingleByte failed return false for CP437"); + + text = "Cosmos is wonderful!"; + result = xEncoding.GetBytes(text); + expectedResult = CP437EnglishText; + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of English text failed byte arrays different"); + + text = "Cosmos è fantastico!"; + result = xEncoding.GetBytes(text); + expectedResult = CP437ItalianText; + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Italian text failed byte arrays different"); + + text = "Cosmos es genial!"; + result = xEncoding.GetBytes(text); + expectedResult = CP437SpanishText; + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Spanish text failed byte arrays different"); + + text = "Cosmos ist großartig!"; + result = xEncoding.GetBytes(text); + expectedResult = CP437GermanicText; + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Germanic text failed byte arrays different"); + + /* + * From this point on a lot of characters will be replaced by 0x3F ('?') because + * cannot be really represented on CP437 + */ + text = "Cosmos είναι υπέροχος!"; + result = xEncoding.GetBytes(text); + expectedResult = CP437GreekText; + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Greek text failed byte arrays different"); + + text = "Cosmos 素晴らしいです!"; + result = xEncoding.GetBytes(text); + expectedResult = CP437JapanaseText; + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Japanese text failed byte arrays different"); + + text = "𐍈"; + result = xEncoding.GetBytes(text); + expectedResult = CP437GothicText; + Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Gothic text failed byte arrays different"); + + //string expectedText; + /* Now we do the other way: we have a CP437 byte array and try to convert it in a UFT16 String */ + text = xEncoding.GetString(CP437EnglishText); + expectedText = "Cosmos is wonderful!"; + Assert.IsTrue((text == expectedText), "CP437 Decoding of English text failed strings different"); + + text = xEncoding.GetString(CP437ItalianText); + expectedText = "Cosmos è fantastico!"; + Assert.IsTrue((text == expectedText), "CP437 Decoding of Italian text failed strings different"); + + text = xEncoding.GetString(CP437SpanishText); + expectedText = "Cosmos es genial!"; + Assert.IsTrue((text == expectedText), "CP437 Decoding of Spanish text failed strings different"); + + text = xEncoding.GetString(CP437GermanicText); + expectedText = "Cosmos ist großartig!"; + Assert.IsTrue((text == expectedText), "CP437 Decoding of Germanic text failed strings different"); + + /* CP437 replaces not representable characters with '?' */ + text = xEncoding.GetString(CP437GreekText); + expectedText = "Cosmos ε??α? ?π??????!"; + Assert.IsTrue((text == expectedText), "CP437 Decoding of Greek text failed strings different"); + + text = xEncoding.GetString(CP437JapanaseText); + expectedText = "Cosmos ???????!"; + Assert.IsTrue((text == expectedText), "CP437 Decoding of Japanese text failed strings different"); + + text = xEncoding.GetString(CP437GothicText); + expectedText = "??"; + Assert.IsTrue((text == expectedText), "CP437 Decoding of Gothic text failed strings different"); } +#endif } } diff --git a/source/Cosmos.Core_Plugs/System/StringImpl.cs b/source/Cosmos.Core_Plugs/System/StringImpl.cs index 11b58a5a6..e2853b719 100644 --- a/source/Cosmos.Core_Plugs/System/StringImpl.cs +++ b/source/Cosmos.Core_Plugs/System/StringImpl.cs @@ -83,7 +83,6 @@ namespace Cosmos.Core_Plugs.System } } - public static unsafe void Ctor( string aThis, char aChar, @@ -100,6 +99,19 @@ namespace Cosmos.Core_Plugs.System } } + /* + * These 2 unsafe string Ctor are only "stubs" implemented because Encoding needed them existing but our implementation is not + * using them. Sincerely in Cosmos I hope there wi + */ + public unsafe static void Ctor(string aThis, sbyte* aValue) + { + throw new NotImplementedException("String Ctor(sbyte * '\0' terminated)"); + } + + public unsafe static void Ctor(string aThis, sbyte* aValue, int aStartIndex, int aLength) + { + throw new NotImplementedException("String Ctor(sbyte * with lenght)"); + } public static unsafe int get_Length( [ObjectPointerAccess] uint* aThis, diff --git a/source/Cosmos.System2/Text/CP437Encoding.cs b/source/Cosmos.System2/Text/CP437Encoding.cs new file mode 100644 index 000000000..3f88d0df5 --- /dev/null +++ b/source/Cosmos.System2/Text/CP437Encoding.cs @@ -0,0 +1,38 @@ +//#define COSMOSDEBUG +using Cosmos.Debug.Kernel; + +namespace Cosmos.System.ExtendedASCII +{ + internal class CP437Enconding : SingleByteEncoding + { + private static Debugger myDebugger = new Debugger("System", "CP437 Encoding"); + + internal CP437Enconding() + { + myDebugger.SendInternal("CP437Enconding Setting CodePageTable only one time..."); + + CodePageTable = new char[] { + 'Ç', 'ü', 'é', 'â', 'ä', 'à', 'å', 'ç', + 'ê', 'ë', 'è', 'ï', 'î', 'ì', 'Ä', 'Å', + 'É', 'æ', 'Æ', 'ô', 'ö', 'ò', 'û', 'ù', + 'ÿ', 'Ö', 'Ü', '¢', '£', '¥', '₧', 'ƒ', + 'á', 'í', 'ó', 'ú', 'ñ', 'Ñ', 'ª', 'º', + '¿', '⌐', '¬', '½', '¼', '¡', '«', '»', + '░', '▒', '▓', '│', '┤', '╡', '╢', '╖', + '╕', '╣', '║', '╗', '╝', '╜', '╛', '┐', + '└', '┴', '┬', '├', '─', '┼', '╞', '╟', + '╚', '╔', '╩', '╦','╠', '═', '╬', '╧', + '╨', '╤', '╥', '╙', '╘', '╒', '╓','╫', + '╪', '┘', '┌', '█', '▄', '▌', '▐', '▀', + 'α', 'ß', 'Γ', 'π', 'Σ', 'σ', 'µ', 'τ', + 'Φ', 'Θ', 'Ω', 'δ', '∞', 'φ', 'ε', '∩', + '≡', '±', '≥', '≤', '⌠', '⌡', '÷', '≈', + '°', '∙', '·', '√', 'ⁿ', '²', '■', '\x00A0' + }; + } + + public override string BodyName => "IBM437"; + + public override int CodePage => 437; + } +} diff --git a/source/Cosmos.System2/Text/CP858Encoding.cs b/source/Cosmos.System2/Text/CP858Encoding.cs new file mode 100644 index 000000000..1919bb41e --- /dev/null +++ b/source/Cosmos.System2/Text/CP858Encoding.cs @@ -0,0 +1,31 @@ +//#define COSMOSDEBUG +using Cosmos.Debug.Kernel; + +namespace Cosmos.System.ExtendedASCII +{ + internal class CP858Enconding : SingleByteEncoding + { + private static Debugger myDebugger = new Debugger("System", "CP858 Encoding"); + + internal CP858Enconding() + { + myDebugger.SendInternal($"CP858Enconding Setting CodePageTable only one time..."); + + CodePageTable = new char[] + { + 'Ç', 'ü', 'é', 'â', 'ä', 'à', 'å', 'ç', 'ê', 'ë', 'è', 'ï', 'î', 'ì', 'Ä', 'Å', + 'É', 'æ', 'Æ', 'ô', 'ö', 'ò', 'û', 'ù', 'ÿ', 'Ö', 'Ü', 'ø', '£', 'Ø', '×', 'ƒ', + 'á', 'í', 'ó', 'ú', 'ñ', 'Ñ', 'ª', 'º', '¿', '®', '¬', '½', '¼', '¡', '«', '»', + '░', '▒', '▓', '│', '┤', 'Á', 'Â', 'À', '©', '╣', '║', '╗', '╝', '¢', '¥', '┐', + '└', '┴', '┬', '├', '─', '┼', 'ã', 'Ã', '╚', '╔', '╩', '╦', '╠', '═', '╬', '¤', + 'ð', 'Ð', 'Ê', 'Ë', 'È', '€', 'Í', 'Î', 'Ï', '┘', '┌', '█', '▄', '¦', 'Ì', '▀', + 'Ó', 'ß', 'Ô', 'Ò', 'õ', 'Õ', 'µ', 'þ', 'Þ', 'Ú', 'Û', 'Ù', 'ý', 'Ý', '¯', '´', + '\u00AD', '±', '‗', '¾', '¶', '§', '÷', '¸', '°', '¨', '·', '¹', '³', '²', '■', '\u00A0' + }; + } + + public override string BodyName => "IBM00858"; + + public override int CodePage => 437; + } +} diff --git a/source/Cosmos.System2/Text/CosmosEncodingProvider.cs b/source/Cosmos.System2/Text/CosmosEncodingProvider.cs new file mode 100644 index 000000000..c9194ce45 --- /dev/null +++ b/source/Cosmos.System2/Text/CosmosEncodingProvider.cs @@ -0,0 +1,39 @@ +#define COSMOSDEBUG +using System.Text; +using Cosmos.Debug.Kernel; + +namespace Cosmos.System.ExtendedASCII +{ + public class CosmosEncodingProvider : EncodingProvider + { + private static readonly EncodingProvider s_singleton = new CosmosEncodingProvider(); + private static Debugger myDebugger = new Debugger("System", "CosmosEncodingProvider"); + + internal CosmosEncodingProvider() { } + + public static EncodingProvider Instance + { + get { return s_singleton; } + } + + public override Encoding GetEncoding(int codepage) + { + myDebugger.SendInternal($"Getting Encoding for codepage {codepage}"); + if (codepage < 0 || codepage > 65535) + return null; + + /* Let's check on our EncodingTable, if codepage is not found null is returned */ + return EncodingTable.GetEncoding(codepage); + } + + public override Encoding GetEncoding(string name) + { + myDebugger.SendInternal($"Getting Encoding for codepage with name {name}"); + int codepage = EncodingTable.GetCodePageFromDesc(name); + if (codepage == -1) + return null; + + return GetEncoding(codepage); + } + } +} diff --git a/source/Cosmos.System2/Text/EncodingTable.cs b/source/Cosmos.System2/Text/EncodingTable.cs new file mode 100644 index 000000000..ae8ccf881 --- /dev/null +++ b/source/Cosmos.System2/Text/EncodingTable.cs @@ -0,0 +1,60 @@ +#define COSMOSDEBUG +using System.Text; +using Cosmos.Debug.Kernel; + +namespace Cosmos.System.ExtendedASCII +{ + /* + * Ideally we should use Dictionary or HashTable here but are yet not working in Cosmos so I have done + * this replacement class for now... + */ + internal static class EncodingTable + { + private static Debugger myDebugger = new Debugger("System", "CP437 Encoding"); + + static EncodingTable() + { + myDebugger.SendInternal("Inizializing Encoding Table"); + + Add(437, "IBM437", new CP437Enconding()); + Add(858, "IBM0858", new CP858Enconding()); + } + + private struct values + { + public string desc; + public Encoding encoding; + + public values(string desc, Encoding encoding) + { + this.desc = desc; + this.encoding = encoding; + } + }; + + const int MaxCodepageChacheSize = 2048; + static values[] CodepageCache = new values[MaxCodepageChacheSize]; + + public static void Add(int codepage, string desc, Encoding encoding) + { + myDebugger.SendInternal($"Adding codepage {codepage} desc {desc}"); + CodepageCache[codepage] = new values(desc, encoding); + } + + public static string GetDescription(int codepage) => CodepageCache[codepage].desc; + + public static Encoding GetEncoding(int codepage) => CodepageCache[codepage].encoding; + + public static int GetCodePageFromDesc(string desc) + { + for (int idx = 0; idx < MaxCodepageChacheSize; idx++) + { + if (CodepageCache[idx].desc == desc) + return idx; + } + + /* Not found! */ + return -1; + } + } +} diff --git a/source/Cosmos.System2/Text/SingleByteEncoding.cs b/source/Cosmos.System2/Text/SingleByteEncoding.cs new file mode 100644 index 000000000..1a800d457 --- /dev/null +++ b/source/Cosmos.System2/Text/SingleByteEncoding.cs @@ -0,0 +1,187 @@ +//#define COSMOSDEBUG +using System; +using System.Text; +using Cosmos.Debug.Kernel; + +namespace Cosmos.System.ExtendedASCII +{ + internal class SingleByteEncoding : Encoding + { + private static Debugger mDebugger = new Debugger("System", "SingleByteEncoding"); + + internal char[] CodePageTable { get; set; } + private const byte ReplacementChar = (byte)'?'; + + public override bool IsSingleByte => true; + + public override int GetByteCount(char[] chars, int index, int count) + { + mDebugger.SendInternal($"GetByteCount of chars {new string(chars)} index {index} count {count}"); + // Validate input parameters + if (chars == null) + throw new ArgumentNullException("chars", "Null Array"); + + if (index < 0 || count < 0) + throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), "negative number"); + + if (chars.Length - index < count) + throw new ArgumentOutOfRangeException("chars", "count more that what is in array"); + + // If no input, return 0, avoid fixed empty array problem + if (count == 0) + return 0; + + // If no input just return 0, fixed doesn't like 0 length arrays + if (count == 0) + return 0; + + //return chars.Length - index - count; + return count - index; + } + + private int GetCodePageIdxFromChr(char ch) + { + int idx; + + /* IL2CPU bug again with interfaces :-( let's do it manually... */ + //idx = Array.IndexOf(CodePageTable, ch); + + for (idx = 0; idx < CodePageTable.Length; idx++) + { + if (CodePageTable[idx] == ch) + break; + } + + // All CodePageTable searched, nothing found! + if (idx == CodePageTable.Length) + return -1; + + return idx + 128; + } + + private byte GetByte(char ch) + { + //mDebugger.SendInternal($"Converting to CodePageTable ch {ch} (codepoint) {(int)ch}"); + + /* ch is in reality an ASCII character? */ + if (ch < 127) + { + mDebugger.SendInternal($"ch {ch} is ASCII"); + return (byte)ch; + } + + mDebugger.SendInternal($"ch {ch} could be Extended Ascii"); + int idx = GetCodePageIdxFromChr(ch); + if (idx == -1) + { + mDebugger.SendInternal($"ch {ch} not in CodePageTable replaced with {(char)ReplacementChar}"); + return ReplacementChar; + } + + mDebugger.SendInternal($"ch {ch} is CodePageTable {idx}"); + return (byte)idx; + } + + public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + { + mDebugger.SendInternal($"GetBytes of chars {new string(chars)} index {charIndex} count {charCount}"); + // Validate input parameters + if (chars == null) + throw new ArgumentNullException("chars", "Null Array"); + + if (charIndex < 0 || charCount < 0) + throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), "negative number"); + + if (chars.Length - charIndex < charCount) + throw new ArgumentOutOfRangeException("chars", "count more that what is in array"); + + mDebugger.SendInternal($"Converting to CodePageTable: {new String(chars)}"); + + for (int i = charIndex; i < charCount; i++) + { + bytes[byteIndex + i] = GetByte(chars[i]); + } + + mDebugger.SendInternal($"So as bytes we have {BitConverter.ToString(bytes)}"); + return bytes.Length; + } + + public override int GetCharCount(byte[] bytes, int index, int count) + { + mDebugger.SendInternal($"GetCharCount of bytes {BitConverter.ToString(bytes)} index {index} count {count}"); + // Validate Parameters + if (bytes == null) + throw new ArgumentNullException("bytes", "Null Array"); + + if (index < 0 || count < 0) + throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), "negative number"); + + if (bytes.Length - index < count) + throw new ArgumentOutOfRangeException("bytes", "count more that what is in array"); + + // If no input just return 0, fixed doesn't like 0 length arrays + if (count == 0) + return 0; + + return count - index; + } + + private char GetChar(byte b) + { + mDebugger.SendInternal($"Converting to UTF16: {b}..."); + + /* Ascii? Simply cast it then... */ + if (b >= 0 && b < 127) + { + mDebugger.SendInternal($"b {b} is ASCII"); + return (char)b; + } + + mDebugger.SendInternal($"b in Extended ASCII"); + return CodePageTable[b - 128]; + } + + public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) + { + mDebugger.SendInternal($"Converting to UTF16: {BitConverter.ToString(bytes)}..."); + // Validate Parameters + if (bytes == null) + throw new ArgumentNullException("bytes", "Null Array"); + + if (byteIndex < 0 || byteCount < 0) + throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), "negative number"); + + if (bytes.Length - byteIndex < byteCount) + throw new ArgumentOutOfRangeException("bytes", "count more that what is in array"); + + // If no input just return 0, fixed doesn't like 0 length arrays + if (byteCount == 0) + return 0; + + for (int i = byteIndex; i < byteCount; i++) + { + chars[charIndex + i] = GetChar(bytes[i]); + } + + mDebugger.SendInternal($"So as chars we have {new String(chars)}"); + + return chars.Length; + } + + public override int GetMaxByteCount(int charCount) + { + if (charCount < 0) + throw new ArgumentOutOfRangeException(nameof(charCount), + "negative number"); + + // Characters would be # of characters + 1 in case high surrogate is ? * max fallback + return charCount + 1; + } + + public override int GetMaxCharCount(int byteCount) + { + // Just return length, SBCS stay the same length because they don't map to surrogate + return byteCount; + } + } +} diff --git a/source/Cosmos.System2_Plugs/System/Globalization/EncodingTableImpl.cs b/source/Cosmos.System2_Plugs/System/Globalization/EncodingTableImpl.cs new file mode 100644 index 000000000..99af0055b --- /dev/null +++ b/source/Cosmos.System2_Plugs/System/Globalization/EncodingTableImpl.cs @@ -0,0 +1,31 @@ +using Cosmos.Debug.Kernel; +using Cosmos.System2_Plugs.System.Text; +using IL2CPU.API.Attribs; +using System; +using System.Collections; +using System.Text; + +namespace Cosmos.System2_Plugs.System.Globalization +{ + [Plug(TargetName = "System.Globalization.EncodingTable")] + public static class EncodingTableImpl + { + /* + * This is Table is pratically empty in Net Core, but instatiate a Dictionary that Cosmos yet does not + * support when it will support them probably this plug will be not needed anymore. + */ + public static void Cctor() + { + } + + public static object GetCodePageDataItem(int codepage) + { + return null; + } + + public static int GetCodePageFromName(string name) + { + return -1; + } + } +}