mirror of
https://github.com/danbulant/Cosmos
synced 2026-06-11 18:51:41 +00:00
Added CP437 and CP858 (DOS Latin1) support to Encoding, CP858 is not working for VMT bugs.
This commit is contained in:
parent
e06efd8bcd
commit
ff78034a1c
8 changed files with 664 additions and 25 deletions
|
|
@ -1,13 +1,18 @@
|
|||
using System;
|
||||
#define COSMOSDEBUG
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using Cosmos.TestRunner;
|
||||
using Cosmos.Compiler.Tests.Bcl.Helper;
|
||||
using Cosmos.Debug.Kernel;
|
||||
using Cosmos.System.ExtendedASCII;
|
||||
|
||||
namespace Cosmos.Compiler.Tests.Bcl.System
|
||||
{
|
||||
class EncodingTest
|
||||
{
|
||||
static Debugger mDebugger = new Debugger("System", "Enconding Test");
|
||||
|
||||
static byte[] UTF8EnglishText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x69, 0x73, 0x20,
|
||||
0x77, 0x6F, 0x6E, 0x64, 0x65, 0x72, 0x66, 0x75, 0x6C, 0x21 };
|
||||
static byte[] UTF8ItalianText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0xC3, 0xA8, 0x20,
|
||||
|
|
@ -26,90 +31,326 @@ namespace Cosmos.Compiler.Tests.Bcl.System
|
|||
0xE6, 0x99, 0xB4, 0xE3, 0x82, 0x89, 0xE3, 0x81, 0x97, 0xE3,
|
||||
0x81, 0x84, 0xE3, 0x81, 0xA7, 0xE3, 0x81, 0x99, 0x21 };
|
||||
static byte[] UTF8GothicText = new byte[] { 0xF0, 0x90, 0x8D, 0x88 };
|
||||
static byte[] CP437EnglishText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x69, 0x73, 0x20,
|
||||
0x77, 0x6F, 0x6E, 0x64, 0x65, 0x72, 0x66, 0x75, 0x6C, 0x21 };
|
||||
static byte[] CP437ItalianText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x8A, 0x20, 0x66,
|
||||
0x61, 0x6E, 0x74, 0x61, 0x73, 0x74, 0x69, 0x63, 0x6F, 0x21 };
|
||||
|
||||
static byte[] CP437SpanishText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x65, 0x73, 0x20,
|
||||
0x67, 0x65, 0x6E, 0x69, 0x61, 0x6C, 0x21 };
|
||||
|
||||
static byte[] CP437GermanicText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x69, 0x73, 0x74, 0x20,
|
||||
0x67, 0x72, 0x6F, 0xE1, 0x61, 0x72, 0x74, 0x69, 0x67, 0x21 };
|
||||
static byte[] CP437GreekText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0xEE, 0x3F, 0x3F, 0xE0,
|
||||
0x3F, 0x20, 0x3F, 0xE3, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x21 };
|
||||
static byte[] CP437JapanaseText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x3F, 0x3F, 0x3F, 0x3F,
|
||||
0x3F, 0x3F, 0x3F, 0x21 };
|
||||
static byte[] CP437GothicText = new byte[] { 0x3F, 0x3F };
|
||||
static byte[] CP858EnglishText = CP437EnglishText;
|
||||
static byte[] CP858ItalianText = CP437ItalianText;
|
||||
static byte[] CP858SpanishText = CP437SpanishText;
|
||||
static byte[] CP858GermanicText = CP437GermanicText;
|
||||
/* CP858 has no Greek characters they are all replaced by '?' (0x3F) */
|
||||
static byte[] CP858GreekText = new byte[] { 0x43, 0x6F, 0x73, 0x6D, 0x6F, 0x73, 0x20, 0x3F, 0x3F, 0x3F, 0x3F,
|
||||
0x3F, 0x20, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x21 };
|
||||
static byte[] CP858JapanaseText = CP437JapanaseText;
|
||||
static byte[] CP858GothicText = CP437GothicText;
|
||||
|
||||
static void TestGetBytes(Encoding xEncoding, string text, byte[] expectedResult, string desc)
|
||||
{
|
||||
byte[] result;
|
||||
|
||||
result = xEncoding.GetBytes(text);
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), $"{xEncoding.BodyName} Encoding of {desc} text failed byte arrays different");
|
||||
}
|
||||
|
||||
|
||||
static void TestGetString(Encoding xEncoding, byte[] bytes, string expectedText, string desc)
|
||||
{
|
||||
string text;
|
||||
|
||||
text = xEncoding.GetString(bytes);
|
||||
Assert.IsTrue((text == expectedText), $"{xEncoding.BodyName} Decoding of {desc} text failed strings different");
|
||||
}
|
||||
|
||||
static void TestUTF8()
|
||||
{
|
||||
Encoding xEncoding = new UTF8Encoding();
|
||||
|
||||
mDebugger.SendInternal($"Starting Test {xEncoding.BodyName} Encoding / Decoding");
|
||||
|
||||
TestGetBytes(xEncoding, "Cosmos is wonderful!", UTF8EnglishText, "English");
|
||||
TestGetBytes(xEncoding, "Cosmos è fantastico!", UTF8ItalianText, "Italian");
|
||||
TestGetBytes(xEncoding, "Cosmos es genial!", UTF8SpanishText, "Spanish");
|
||||
TestGetBytes(xEncoding, "Cosmos ist großartig!", UTF8GermanicText, "Germanic");
|
||||
TestGetBytes(xEncoding, "Cosmos είναι υπέροχος!", UTF8GreekText, "Greek");
|
||||
TestGetBytes(xEncoding, "Cosmos 素晴らしいです!", UTF8JapanaseText, "Japanese");
|
||||
TestGetBytes(xEncoding, "𐍈", UTF8GothicText, "Gothic");
|
||||
|
||||
TestGetString(xEncoding, UTF8EnglishText, "Cosmos is wonderful!", "English");
|
||||
TestGetString(xEncoding, UTF8ItalianText, "Cosmos è fantastico!", "Italian");
|
||||
TestGetString(xEncoding, UTF8SpanishText, "Cosmos es genial!", "Spanish");
|
||||
TestGetString(xEncoding, UTF8GermanicText, "Cosmos ist großartig!", "Germanic");
|
||||
/* CP437 replaces not representable characters with '?' */
|
||||
TestGetString(xEncoding, UTF8GreekText, "Cosmos είναι υπέροχος!", "Greek");
|
||||
TestGetString(xEncoding, UTF8JapanaseText, "Cosmos 素晴らしいです!", "Japanese");
|
||||
TestGetString(xEncoding, UTF8GothicText, "𐍈", "Gothic");
|
||||
|
||||
mDebugger.SendInternal($"Finished Test {xEncoding.BodyName} Encoding / Decoding");
|
||||
}
|
||||
|
||||
static void TestCP437()
|
||||
{
|
||||
Encoding xEncoding = Encoding.GetEncoding(437);
|
||||
|
||||
mDebugger.SendInternal($"Starting Test {xEncoding.BodyName} Encoding / Decoding");
|
||||
|
||||
TestGetBytes(xEncoding, "Cosmos is wonderful!", CP437EnglishText, "English");
|
||||
TestGetBytes(xEncoding, "Cosmos è fantastico!", CP437ItalianText, "Italian");
|
||||
TestGetBytes(xEncoding, "Cosmos es genial!", CP437SpanishText, "Spanish");
|
||||
TestGetBytes(xEncoding, "Cosmos ist großartig!", CP437GermanicText, "Germanic");
|
||||
/*
|
||||
* From this point on a lot of characters will be replaced by 0x3F ('?') because
|
||||
* cannot really represented on CP437
|
||||
*/
|
||||
TestGetBytes(xEncoding, "Cosmos είναι υπέροχος!", CP437GreekText, "Greek");
|
||||
TestGetBytes(xEncoding, "Cosmos 素晴らしいです!", CP437JapanaseText, "Japanese");
|
||||
TestGetBytes(xEncoding, "𐍈", CP437GothicText, "Gothic");
|
||||
|
||||
TestGetString(xEncoding, CP437EnglishText, "Cosmos is wonderful!", "English");
|
||||
TestGetString(xEncoding, CP437ItalianText, "Cosmos è fantastico!", "Italian");
|
||||
TestGetString(xEncoding, CP437SpanishText, "Cosmos es genial!", "Spanish");
|
||||
TestGetString(xEncoding, CP437GermanicText, "Cosmos ist großartig!", "Germanic");
|
||||
/* CP437 replaces not representable characters with '?' */
|
||||
TestGetString(xEncoding, CP437GreekText, "Cosmos ε??α? ?π??????!", "Greek");
|
||||
TestGetString(xEncoding, CP437JapanaseText, "Cosmos ???????!", "Japanese");
|
||||
TestGetString(xEncoding, CP437GothicText, "??", "Gothic");
|
||||
|
||||
mDebugger.SendInternal("Finished Test {xEncoding.BodyName} Encoding / Decoding");
|
||||
}
|
||||
|
||||
static void TestCP858()
|
||||
{
|
||||
Encoding xEncoding = Encoding.GetEncoding(858);
|
||||
|
||||
mDebugger.SendInternal($"Starting Test {xEncoding.BodyName} Encoding / Decoding");
|
||||
|
||||
TestGetBytes(xEncoding, "Cosmos is wonderful!", CP858EnglishText, "English");
|
||||
TestGetBytes(xEncoding, "Cosmos è fantastico!", CP858ItalianText, "Italian");
|
||||
TestGetBytes(xEncoding, "Cosmos es genial!", CP858SpanishText, "Spanish");
|
||||
TestGetBytes(xEncoding, "Cosmos ist großartig!", CP858GermanicText, "Germanic");
|
||||
/*
|
||||
* From this point on a lot of characters will be replaced by 0x3F ('?') because
|
||||
* cannot really represented on CP858
|
||||
*/
|
||||
TestGetBytes(xEncoding, "Cosmos είναι υπέροχος!", CP858GreekText, "Greek");
|
||||
TestGetBytes(xEncoding, "Cosmos 素晴らしいです!", CP858JapanaseText, "Japanese");
|
||||
TestGetBytes(xEncoding, "𐍈", CP858GothicText, "Gothic");
|
||||
|
||||
TestGetString(xEncoding, CP858EnglishText, "Cosmos is wonderful!", "English");
|
||||
TestGetString(xEncoding, CP858ItalianText, "Cosmos è fantastico!", "Italian");
|
||||
TestGetString(xEncoding, CP858SpanishText, "Cosmos es genial!", "Spanish");
|
||||
TestGetString(xEncoding, CP858GermanicText, "Cosmos ist großartig!", "Germanic");
|
||||
/* CP858 replaces not representable characters with '?' */
|
||||
TestGetString(xEncoding, CP858GreekText, "Cosmos ????? ????????!", "Greek");
|
||||
TestGetString(xEncoding, CP858JapanaseText, "Cosmos ???????!", "Japanese");
|
||||
TestGetString(xEncoding, CP858GothicText, "??", "Gothic");
|
||||
|
||||
mDebugger.SendInternal("Finished Test CP858 Encoding / Decoding");
|
||||
}
|
||||
|
||||
public static void Execute()
|
||||
{
|
||||
//CosmosUTF8Encoding Encoder = new CosmosUTF8Encoding();
|
||||
//Encoder Encoder = new UTF8Encoding().GetEncoder();
|
||||
Encoding Encoder = new UTF8Encoding();
|
||||
/*
|
||||
* Net Core has removed all the legacy codepages from Encoding, only Unicode and ASCII are supported
|
||||
* the correct way to add them is to create an Encoding Provider.
|
||||
* Microsoft has created a CodePageEncodingProvider for this but it is too much complex to use it in
|
||||
* Cosmos now, but we should use surely this in future.
|
||||
* As a replacement for it I have created CosmosEncodingProvider that is more simple (but less efficient).
|
||||
*/
|
||||
Encoding.RegisterProvider(CosmosEncodingProvider.Instance);
|
||||
|
||||
//TestUTF8();
|
||||
// TestAscii();
|
||||
|
||||
TestCP437();
|
||||
TestCP858();
|
||||
}
|
||||
|
||||
#if false
|
||||
public static void Execute()
|
||||
{
|
||||
/*
|
||||
* Net Core has removed all the legacy codepages from Encoding, only Unicode and ASCII are supported
|
||||
* the correct way to add them is to create an Encoding Provider.
|
||||
* Microsoft has created a CodePageEncodingProvider for this but it is too much complex to use it in
|
||||
* Cosmos now, but we should use surely this in future.
|
||||
* As a replacement for it I have created CosmosEncodingProvider that is more simple (but less efficient).
|
||||
*/
|
||||
Encoding.RegisterProvider(CosmosEncodingProvider.Instance);
|
||||
|
||||
Encoding xEncoding = new UTF8Encoding();
|
||||
string text;
|
||||
byte[] result;
|
||||
byte[] expectedResult;
|
||||
|
||||
Assert.IsTrue(!xEncoding.IsSingleByte, "IsSingleByte failed return true for UTF8");
|
||||
|
||||
#if true
|
||||
text = "Cosmos is wonderful!";
|
||||
result = Encoder.GetBytes(text);
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = UTF8EnglishText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of English text failed byte arrays different");
|
||||
|
||||
text = "Cosmos è fantastico!";
|
||||
result = Encoder.GetBytes(text);
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = UTF8ItalianText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Italian text failed byte arrays different");
|
||||
|
||||
text = "Cosmos es genial!";
|
||||
result = Encoder.GetBytes(text);
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = UTF8SpanishText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Spanish text failed byte arrays different");
|
||||
|
||||
text = "Cosmos ist großartig!";
|
||||
result = Encoder.GetBytes(text);
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = UTF8GermanicText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Germanic text failed byte arrays different");
|
||||
|
||||
text = "Cosmos είναι υπέροχος!";
|
||||
result = Encoder.GetBytes(text);
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = UTF8GreekText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Greek text failed byte arrays different");
|
||||
|
||||
text = "Cosmos 素晴らしいです!";
|
||||
result = Encoder.GetBytes(text);
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = UTF8JapanaseText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Japanese text failed byte arrays different");
|
||||
|
||||
/* This the only case on which UFT-16 must use a surrugate pairs... it is a Gothic letter go figure! */
|
||||
text = "𐍈";
|
||||
result = Encoder.GetBytes(text);
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = UTF8GothicText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "UTF8 Encoding of Gothic text failed byte arrays different");
|
||||
|
||||
/* Now we do the other way: we have a UFT8 byte array and try to convert it in a UFT16 String */
|
||||
string expectedText;
|
||||
|
||||
text = Encoder.GetString(UTF8EnglishText);
|
||||
text = xEncoding.GetString(UTF8EnglishText);
|
||||
expectedText = "Cosmos is wonderful!";
|
||||
Assert.IsTrue((text == expectedText), "UTF8 Decoding of English text failed strings different");
|
||||
|
||||
text = Encoder.GetString(UTF8ItalianText);
|
||||
text = xEncoding.GetString(UTF8ItalianText);
|
||||
expectedText = "Cosmos è fantastico!";
|
||||
Assert.IsTrue((text == expectedText), "UTF8 Decoding of Italian text failed strings different");
|
||||
|
||||
text = Encoder.GetString(UTF8SpanishText);
|
||||
text = xEncoding.GetString(UTF8SpanishText);
|
||||
expectedText = "Cosmos es genial!";
|
||||
Assert.IsTrue((text == expectedText), "UTF8 Decoding of Spanish text failed strings different");
|
||||
|
||||
text = Encoder.GetString(UTF8GermanicText);
|
||||
text = xEncoding.GetString(UTF8GermanicText);
|
||||
expectedText = "Cosmos ist großartig!";
|
||||
Assert.IsTrue((text == expectedText), "UTF8 Decoding of Germanic text failed strings different");
|
||||
|
||||
text = Encoder.GetString(UTF8GreekText);
|
||||
text = xEncoding.GetString(UTF8GreekText);
|
||||
expectedText = "Cosmos είναι υπέροχος!";
|
||||
Assert.IsTrue((text == expectedText), "UTF8 Decoding of Greek text failed strings different");
|
||||
|
||||
text = Encoder.GetString(UTF8JapanaseText);
|
||||
text = xEncoding.GetString(UTF8JapanaseText);
|
||||
expectedText = "Cosmos 素晴らしいです!";
|
||||
Assert.IsTrue((text == expectedText), "UTF8 Decoding of Japanese text failed strings different");
|
||||
|
||||
text = Encoder.GetString(UTF8GothicText);
|
||||
text = xEncoding.GetString(UTF8GothicText);
|
||||
expectedText = "𐍈";
|
||||
Assert.IsTrue((text == expectedText), "UTF8 Decoding of Gothic text failed strings different");
|
||||
|
||||
/* But this not work is searching '437' in some native Windows tables, we need plugs for this sadly! */
|
||||
//Encoder = Encoding.GetEncoding(437);
|
||||
//text = "àèìòù";
|
||||
//result = Encoder.GetBytes(text);
|
||||
//expectedResult = new byte[] { 0x85, 0x8A, 0x8D, 0x95, 0x97 };
|
||||
//Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of accents text failed byte arrays different");
|
||||
xEncoding = Encoding.ASCII;
|
||||
|
||||
Assert.IsTrue(xEncoding.IsSingleByte, "IsSingleByte failed return false for ASCII");
|
||||
|
||||
text = "Cosmos is wonderful!";
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = UTF8EnglishText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "Ascii Encoding of English text failed byte arrays different");
|
||||
#endif
|
||||
|
||||
xEncoding = Encoding.GetEncoding(437);
|
||||
var yEncoding = Encoding.GetEncoding("IBM437");
|
||||
|
||||
Assert.IsTrue(xEncoding.CodePage == yEncoding.CodePage, "437 and 'IBM437' not the same Encoding");
|
||||
|
||||
Assert.IsTrue(xEncoding.IsSingleByte, "IsSingleByte failed return false for CP437");
|
||||
|
||||
text = "Cosmos is wonderful!";
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = CP437EnglishText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of English text failed byte arrays different");
|
||||
|
||||
text = "Cosmos è fantastico!";
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = CP437ItalianText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Italian text failed byte arrays different");
|
||||
|
||||
text = "Cosmos es genial!";
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = CP437SpanishText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Spanish text failed byte arrays different");
|
||||
|
||||
text = "Cosmos ist großartig!";
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = CP437GermanicText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Germanic text failed byte arrays different");
|
||||
|
||||
/*
|
||||
* From this point on a lot of characters will be replaced by 0x3F ('?') because
|
||||
* cannot be really represented on CP437
|
||||
*/
|
||||
text = "Cosmos είναι υπέροχος!";
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = CP437GreekText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Greek text failed byte arrays different");
|
||||
|
||||
text = "Cosmos 素晴らしいです!";
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = CP437JapanaseText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Japanese text failed byte arrays different");
|
||||
|
||||
text = "𐍈";
|
||||
result = xEncoding.GetBytes(text);
|
||||
expectedResult = CP437GothicText;
|
||||
Assert.IsTrue(EqualityHelper.ByteArrayAreEquals(result, expectedResult), "CP437 Encoding of Gothic text failed byte arrays different");
|
||||
|
||||
//string expectedText;
|
||||
/* Now we do the other way: we have a CP437 byte array and try to convert it in a UFT16 String */
|
||||
text = xEncoding.GetString(CP437EnglishText);
|
||||
expectedText = "Cosmos is wonderful!";
|
||||
Assert.IsTrue((text == expectedText), "CP437 Decoding of English text failed strings different");
|
||||
|
||||
text = xEncoding.GetString(CP437ItalianText);
|
||||
expectedText = "Cosmos è fantastico!";
|
||||
Assert.IsTrue((text == expectedText), "CP437 Decoding of Italian text failed strings different");
|
||||
|
||||
text = xEncoding.GetString(CP437SpanishText);
|
||||
expectedText = "Cosmos es genial!";
|
||||
Assert.IsTrue((text == expectedText), "CP437 Decoding of Spanish text failed strings different");
|
||||
|
||||
text = xEncoding.GetString(CP437GermanicText);
|
||||
expectedText = "Cosmos ist großartig!";
|
||||
Assert.IsTrue((text == expectedText), "CP437 Decoding of Germanic text failed strings different");
|
||||
|
||||
/* CP437 replaces not representable characters with '?' */
|
||||
text = xEncoding.GetString(CP437GreekText);
|
||||
expectedText = "Cosmos ε??α? ?π??????!";
|
||||
Assert.IsTrue((text == expectedText), "CP437 Decoding of Greek text failed strings different");
|
||||
|
||||
text = xEncoding.GetString(CP437JapanaseText);
|
||||
expectedText = "Cosmos ???????!";
|
||||
Assert.IsTrue((text == expectedText), "CP437 Decoding of Japanese text failed strings different");
|
||||
|
||||
text = xEncoding.GetString(CP437GothicText);
|
||||
expectedText = "??";
|
||||
Assert.IsTrue((text == expectedText), "CP437 Decoding of Gothic text failed strings different");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,7 +83,6 @@ namespace Cosmos.Core_Plugs.System
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
public static unsafe void Ctor(
|
||||
string aThis,
|
||||
char aChar,
|
||||
|
|
@ -100,6 +99,19 @@ namespace Cosmos.Core_Plugs.System
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* These 2 unsafe string Ctor are only "stubs" implemented because Encoding needed them existing but our implementation is not
|
||||
* using them. Sincerely in Cosmos I hope there wi
|
||||
*/
|
||||
public unsafe static void Ctor(string aThis, sbyte* aValue)
|
||||
{
|
||||
throw new NotImplementedException("String Ctor(sbyte * '\0' terminated)");
|
||||
}
|
||||
|
||||
public unsafe static void Ctor(string aThis, sbyte* aValue, int aStartIndex, int aLength)
|
||||
{
|
||||
throw new NotImplementedException("String Ctor(sbyte * with lenght)");
|
||||
}
|
||||
|
||||
public static unsafe int get_Length(
|
||||
[ObjectPointerAccess] uint* aThis,
|
||||
|
|
|
|||
38
source/Cosmos.System2/Text/CP437Encoding.cs
Normal file
38
source/Cosmos.System2/Text/CP437Encoding.cs
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
//#define COSMOSDEBUG
|
||||
using Cosmos.Debug.Kernel;
|
||||
|
||||
namespace Cosmos.System.ExtendedASCII
|
||||
{
|
||||
internal class CP437Enconding : SingleByteEncoding
|
||||
{
|
||||
private static Debugger myDebugger = new Debugger("System", "CP437 Encoding");
|
||||
|
||||
internal CP437Enconding()
|
||||
{
|
||||
myDebugger.SendInternal("CP437Enconding Setting CodePageTable only one time...");
|
||||
|
||||
CodePageTable = new char[] {
|
||||
'Ç', 'ü', 'é', 'â', 'ä', 'à', 'å', 'ç',
|
||||
'ê', 'ë', 'è', 'ï', 'î', 'ì', 'Ä', 'Å',
|
||||
'É', 'æ', 'Æ', 'ô', 'ö', 'ò', 'û', 'ù',
|
||||
'ÿ', 'Ö', 'Ü', '¢', '£', '¥', '₧', 'ƒ',
|
||||
'á', 'í', 'ó', 'ú', 'ñ', 'Ñ', 'ª', 'º',
|
||||
'¿', '⌐', '¬', '½', '¼', '¡', '«', '»',
|
||||
'░', '▒', '▓', '│', '┤', '╡', '╢', '╖',
|
||||
'╕', '╣', '║', '╗', '╝', '╜', '╛', '┐',
|
||||
'└', '┴', '┬', '├', '─', '┼', '╞', '╟',
|
||||
'╚', '╔', '╩', '╦','╠', '═', '╬', '╧',
|
||||
'╨', '╤', '╥', '╙', '╘', '╒', '╓','╫',
|
||||
'╪', '┘', '┌', '█', '▄', '▌', '▐', '▀',
|
||||
'α', 'ß', 'Γ', 'π', 'Σ', 'σ', 'µ', 'τ',
|
||||
'Φ', 'Θ', 'Ω', 'δ', '∞', 'φ', 'ε', '∩',
|
||||
'≡', '±', '≥', '≤', '⌠', '⌡', '÷', '≈',
|
||||
'°', '∙', '·', '√', 'ⁿ', '²', '■', '\x00A0'
|
||||
};
|
||||
}
|
||||
|
||||
public override string BodyName => "IBM437";
|
||||
|
||||
public override int CodePage => 437;
|
||||
}
|
||||
}
|
||||
31
source/Cosmos.System2/Text/CP858Encoding.cs
Normal file
31
source/Cosmos.System2/Text/CP858Encoding.cs
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
//#define COSMOSDEBUG
|
||||
using Cosmos.Debug.Kernel;
|
||||
|
||||
namespace Cosmos.System.ExtendedASCII
|
||||
{
|
||||
internal class CP858Enconding : SingleByteEncoding
|
||||
{
|
||||
private static Debugger myDebugger = new Debugger("System", "CP858 Encoding");
|
||||
|
||||
internal CP858Enconding()
|
||||
{
|
||||
myDebugger.SendInternal($"CP858Enconding Setting CodePageTable only one time...");
|
||||
|
||||
CodePageTable = new char[]
|
||||
{
|
||||
'Ç', 'ü', 'é', 'â', 'ä', 'à', 'å', 'ç', 'ê', 'ë', 'è', 'ï', 'î', 'ì', 'Ä', 'Å',
|
||||
'É', 'æ', 'Æ', 'ô', 'ö', 'ò', 'û', 'ù', 'ÿ', 'Ö', 'Ü', 'ø', '£', 'Ø', '×', 'ƒ',
|
||||
'á', 'í', 'ó', 'ú', 'ñ', 'Ñ', 'ª', 'º', '¿', '®', '¬', '½', '¼', '¡', '«', '»',
|
||||
'░', '▒', '▓', '│', '┤', 'Á', 'Â', 'À', '©', '╣', '║', '╗', '╝', '¢', '¥', '┐',
|
||||
'└', '┴', '┬', '├', '─', '┼', 'ã', 'Ã', '╚', '╔', '╩', '╦', '╠', '═', '╬', '¤',
|
||||
'ð', 'Ð', 'Ê', 'Ë', 'È', '€', 'Í', 'Î', 'Ï', '┘', '┌', '█', '▄', '¦', 'Ì', '▀',
|
||||
'Ó', 'ß', 'Ô', 'Ò', 'õ', 'Õ', 'µ', 'þ', 'Þ', 'Ú', 'Û', 'Ù', 'ý', 'Ý', '¯', '´',
|
||||
'\u00AD', '±', '‗', '¾', '¶', '§', '÷', '¸', '°', '¨', '·', '¹', '³', '²', '■', '\u00A0'
|
||||
};
|
||||
}
|
||||
|
||||
public override string BodyName => "IBM00858";
|
||||
|
||||
public override int CodePage => 437;
|
||||
}
|
||||
}
|
||||
39
source/Cosmos.System2/Text/CosmosEncodingProvider.cs
Normal file
39
source/Cosmos.System2/Text/CosmosEncodingProvider.cs
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#define COSMOSDEBUG
|
||||
using System.Text;
|
||||
using Cosmos.Debug.Kernel;
|
||||
|
||||
namespace Cosmos.System.ExtendedASCII
|
||||
{
|
||||
public class CosmosEncodingProvider : EncodingProvider
|
||||
{
|
||||
private static readonly EncodingProvider s_singleton = new CosmosEncodingProvider();
|
||||
private static Debugger myDebugger = new Debugger("System", "CosmosEncodingProvider");
|
||||
|
||||
internal CosmosEncodingProvider() { }
|
||||
|
||||
public static EncodingProvider Instance
|
||||
{
|
||||
get { return s_singleton; }
|
||||
}
|
||||
|
||||
public override Encoding GetEncoding(int codepage)
|
||||
{
|
||||
myDebugger.SendInternal($"Getting Encoding for codepage {codepage}");
|
||||
if (codepage < 0 || codepage > 65535)
|
||||
return null;
|
||||
|
||||
/* Let's check on our EncodingTable, if codepage is not found null is returned */
|
||||
return EncodingTable.GetEncoding(codepage);
|
||||
}
|
||||
|
||||
public override Encoding GetEncoding(string name)
|
||||
{
|
||||
myDebugger.SendInternal($"Getting Encoding for codepage with name {name}");
|
||||
int codepage = EncodingTable.GetCodePageFromDesc(name);
|
||||
if (codepage == -1)
|
||||
return null;
|
||||
|
||||
return GetEncoding(codepage);
|
||||
}
|
||||
}
|
||||
}
|
||||
60
source/Cosmos.System2/Text/EncodingTable.cs
Normal file
60
source/Cosmos.System2/Text/EncodingTable.cs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#define COSMOSDEBUG
|
||||
using System.Text;
|
||||
using Cosmos.Debug.Kernel;
|
||||
|
||||
namespace Cosmos.System.ExtendedASCII
|
||||
{
|
||||
/*
|
||||
* Ideally we should use Dictionary or HashTable here but are yet not working in Cosmos so I have done
|
||||
* this replacement class for now...
|
||||
*/
|
||||
internal static class EncodingTable
|
||||
{
|
||||
private static Debugger myDebugger = new Debugger("System", "CP437 Encoding");
|
||||
|
||||
static EncodingTable()
|
||||
{
|
||||
myDebugger.SendInternal("Inizializing Encoding Table");
|
||||
|
||||
Add(437, "IBM437", new CP437Enconding());
|
||||
Add(858, "IBM0858", new CP858Enconding());
|
||||
}
|
||||
|
||||
private struct values
|
||||
{
|
||||
public string desc;
|
||||
public Encoding encoding;
|
||||
|
||||
public values(string desc, Encoding encoding)
|
||||
{
|
||||
this.desc = desc;
|
||||
this.encoding = encoding;
|
||||
}
|
||||
};
|
||||
|
||||
const int MaxCodepageChacheSize = 2048;
|
||||
static values[] CodepageCache = new values[MaxCodepageChacheSize];
|
||||
|
||||
public static void Add(int codepage, string desc, Encoding encoding)
|
||||
{
|
||||
myDebugger.SendInternal($"Adding codepage {codepage} desc {desc}");
|
||||
CodepageCache[codepage] = new values(desc, encoding);
|
||||
}
|
||||
|
||||
public static string GetDescription(int codepage) => CodepageCache[codepage].desc;
|
||||
|
||||
public static Encoding GetEncoding(int codepage) => CodepageCache[codepage].encoding;
|
||||
|
||||
public static int GetCodePageFromDesc(string desc)
|
||||
{
|
||||
for (int idx = 0; idx < MaxCodepageChacheSize; idx++)
|
||||
{
|
||||
if (CodepageCache[idx].desc == desc)
|
||||
return idx;
|
||||
}
|
||||
|
||||
/* Not found! */
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
187
source/Cosmos.System2/Text/SingleByteEncoding.cs
Normal file
187
source/Cosmos.System2/Text/SingleByteEncoding.cs
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
//#define COSMOSDEBUG
|
||||
using System;
|
||||
using System.Text;
|
||||
using Cosmos.Debug.Kernel;
|
||||
|
||||
namespace Cosmos.System.ExtendedASCII
|
||||
{
|
||||
internal class SingleByteEncoding : Encoding
|
||||
{
|
||||
private static Debugger mDebugger = new Debugger("System", "SingleByteEncoding");
|
||||
|
||||
internal char[] CodePageTable { get; set; }
|
||||
private const byte ReplacementChar = (byte)'?';
|
||||
|
||||
public override bool IsSingleByte => true;
|
||||
|
||||
public override int GetByteCount(char[] chars, int index, int count)
|
||||
{
|
||||
mDebugger.SendInternal($"GetByteCount of chars {new string(chars)} index {index} count {count}");
|
||||
// Validate input parameters
|
||||
if (chars == null)
|
||||
throw new ArgumentNullException("chars", "Null Array");
|
||||
|
||||
if (index < 0 || count < 0)
|
||||
throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), "negative number");
|
||||
|
||||
if (chars.Length - index < count)
|
||||
throw new ArgumentOutOfRangeException("chars", "count more that what is in array");
|
||||
|
||||
// If no input, return 0, avoid fixed empty array problem
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
// If no input just return 0, fixed doesn't like 0 length arrays
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
//return chars.Length - index - count;
|
||||
return count - index;
|
||||
}
|
||||
|
||||
private int GetCodePageIdxFromChr(char ch)
|
||||
{
|
||||
int idx;
|
||||
|
||||
/* IL2CPU bug again with interfaces :-( let's do it manually... */
|
||||
//idx = Array.IndexOf<char>(CodePageTable, ch);
|
||||
|
||||
for (idx = 0; idx < CodePageTable.Length; idx++)
|
||||
{
|
||||
if (CodePageTable[idx] == ch)
|
||||
break;
|
||||
}
|
||||
|
||||
// All CodePageTable searched, nothing found!
|
||||
if (idx == CodePageTable.Length)
|
||||
return -1;
|
||||
|
||||
return idx + 128;
|
||||
}
|
||||
|
||||
private byte GetByte(char ch)
|
||||
{
|
||||
//mDebugger.SendInternal($"Converting to CodePageTable ch {ch} (codepoint) {(int)ch}");
|
||||
|
||||
/* ch is in reality an ASCII character? */
|
||||
if (ch < 127)
|
||||
{
|
||||
mDebugger.SendInternal($"ch {ch} is ASCII");
|
||||
return (byte)ch;
|
||||
}
|
||||
|
||||
mDebugger.SendInternal($"ch {ch} could be Extended Ascii");
|
||||
int idx = GetCodePageIdxFromChr(ch);
|
||||
if (idx == -1)
|
||||
{
|
||||
mDebugger.SendInternal($"ch {ch} not in CodePageTable replaced with {(char)ReplacementChar}");
|
||||
return ReplacementChar;
|
||||
}
|
||||
|
||||
mDebugger.SendInternal($"ch {ch} is CodePageTable {idx}");
|
||||
return (byte)idx;
|
||||
}
|
||||
|
||||
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
|
||||
{
|
||||
mDebugger.SendInternal($"GetBytes of chars {new string(chars)} index {charIndex} count {charCount}");
|
||||
// Validate input parameters
|
||||
if (chars == null)
|
||||
throw new ArgumentNullException("chars", "Null Array");
|
||||
|
||||
if (charIndex < 0 || charCount < 0)
|
||||
throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), "negative number");
|
||||
|
||||
if (chars.Length - charIndex < charCount)
|
||||
throw new ArgumentOutOfRangeException("chars", "count more that what is in array");
|
||||
|
||||
mDebugger.SendInternal($"Converting to CodePageTable: {new String(chars)}");
|
||||
|
||||
for (int i = charIndex; i < charCount; i++)
|
||||
{
|
||||
bytes[byteIndex + i] = GetByte(chars[i]);
|
||||
}
|
||||
|
||||
mDebugger.SendInternal($"So as bytes we have {BitConverter.ToString(bytes)}");
|
||||
return bytes.Length;
|
||||
}
|
||||
|
||||
public override int GetCharCount(byte[] bytes, int index, int count)
|
||||
{
|
||||
mDebugger.SendInternal($"GetCharCount of bytes {BitConverter.ToString(bytes)} index {index} count {count}");
|
||||
// Validate Parameters
|
||||
if (bytes == null)
|
||||
throw new ArgumentNullException("bytes", "Null Array");
|
||||
|
||||
if (index < 0 || count < 0)
|
||||
throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), "negative number");
|
||||
|
||||
if (bytes.Length - index < count)
|
||||
throw new ArgumentOutOfRangeException("bytes", "count more that what is in array");
|
||||
|
||||
// If no input just return 0, fixed doesn't like 0 length arrays
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
return count - index;
|
||||
}
|
||||
|
||||
private char GetChar(byte b)
|
||||
{
|
||||
mDebugger.SendInternal($"Converting to UTF16: {b}...");
|
||||
|
||||
/* Ascii? Simply cast it then... */
|
||||
if (b >= 0 && b < 127)
|
||||
{
|
||||
mDebugger.SendInternal($"b {b} is ASCII");
|
||||
return (char)b;
|
||||
}
|
||||
|
||||
mDebugger.SendInternal($"b in Extended ASCII");
|
||||
return CodePageTable[b - 128];
|
||||
}
|
||||
|
||||
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
|
||||
{
|
||||
mDebugger.SendInternal($"Converting to UTF16: {BitConverter.ToString(bytes)}...");
|
||||
// Validate Parameters
|
||||
if (bytes == null)
|
||||
throw new ArgumentNullException("bytes", "Null Array");
|
||||
|
||||
if (byteIndex < 0 || byteCount < 0)
|
||||
throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), "negative number");
|
||||
|
||||
if (bytes.Length - byteIndex < byteCount)
|
||||
throw new ArgumentOutOfRangeException("bytes", "count more that what is in array");
|
||||
|
||||
// If no input just return 0, fixed doesn't like 0 length arrays
|
||||
if (byteCount == 0)
|
||||
return 0;
|
||||
|
||||
for (int i = byteIndex; i < byteCount; i++)
|
||||
{
|
||||
chars[charIndex + i] = GetChar(bytes[i]);
|
||||
}
|
||||
|
||||
mDebugger.SendInternal($"So as chars we have {new String(chars)}");
|
||||
|
||||
return chars.Length;
|
||||
}
|
||||
|
||||
public override int GetMaxByteCount(int charCount)
|
||||
{
|
||||
if (charCount < 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(charCount),
|
||||
"negative number");
|
||||
|
||||
// Characters would be # of characters + 1 in case high surrogate is ? * max fallback
|
||||
return charCount + 1;
|
||||
}
|
||||
|
||||
public override int GetMaxCharCount(int byteCount)
|
||||
{
|
||||
// Just return length, SBCS stay the same length because they don't map to surrogate
|
||||
return byteCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
using Cosmos.Debug.Kernel;
|
||||
using Cosmos.System2_Plugs.System.Text;
|
||||
using IL2CPU.API.Attribs;
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace Cosmos.System2_Plugs.System.Globalization
|
||||
{
|
||||
[Plug(TargetName = "System.Globalization.EncodingTable")]
|
||||
public static class EncodingTableImpl
|
||||
{
|
||||
/*
|
||||
* This is Table is pratically empty in Net Core, but instatiate a Dictionary that Cosmos yet does not
|
||||
* support when it will support them probably this plug will be not needed anymore.
|
||||
*/
|
||||
public static void Cctor()
|
||||
{
|
||||
}
|
||||
|
||||
public static object GetCodePageDataItem(int codepage)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
public static int GetCodePageFromName(string name)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue