using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; namespace Cosmos.Compiler.XSharp { public class TokenPatterns { protected class Pattern { public TokenList Tokens; public int Hash; public CodeFunc Code; public bool OldCodeType; } public bool EmitUserComments = true; public delegate void CodeFunc(TokenList aTokens, ref List rCode); protected List mPatterns = new List(); protected string mGroup; protected string mProcedureName = null; protected bool mInIntHandler; public TokenPatterns() { AddPatterns(); } protected string Quoted(string aString) { return "\"" + aString + "\""; } protected int IntValue(Token aToken) { if (aToken.Value.StartsWith("0x")) { return int.Parse(aToken.Value.Substring(2), NumberStyles.AllowHexSpecifier); } else { return int.Parse(aToken.Value); } } protected string GroupLabel(int aIdx) { return GroupLabel("{" + aIdx + "}"); } protected string GroupLabel(string aLabel) { return mGroup + "_" + aLabel; } protected string ProcLabel(int aIdx) { return ProcLabel("{" + aIdx + "}"); } protected string ProcLabel(string aLabel) { return mGroup + "_" + mProcedureName + "_" + aLabel; } protected string GetLabel(Token aToken) { if (aToken.Type != TokenType.AlphaNum) { throw new Exception("Label must be AlphaNum."); } string xValue = aToken.Value; if (mProcedureName == null) { if (xValue.StartsWith(".")) { return xValue.Substring(1); } return GroupLabel(xValue); } else { if (xValue.StartsWith("..")) { return xValue.Substring(2); } else if (xValue.StartsWith(".")) { return GroupLabel(xValue.Substring(1)); } return ProcLabel(xValue); } } protected string GetDestRegister(TokenList aTokens, int aIdx) { return GetRegister("Destination", aTokens, aIdx); } protected string GetSrcRegister(TokenList aTokens, int aIdx) { return GetRegister("Source", aTokens, aIdx); } protected string GetRegister(string aPrefix, TokenList aTokens, int aIdx) { var xToken = aTokens[aIdx].Type; Token xNext = null; if (aIdx + 1 < aTokens.Count) { xNext = aTokens[aIdx + 1]; } string xResult = aPrefix + "Reg = RegistersEnum." + aTokens[aIdx].Value; if (xNext != null) { if (xNext.Value == "[") { string xDisplacement; if (aTokens[aIdx + 2].Value == "-") { xDisplacement = "-" + aTokens[aIdx + 2].Value; } else { xDisplacement = aTokens[aIdx + 2].Value; } xResult = xResult + ", " + aPrefix + "IsIndirect = true, " + aPrefix + "Displacement = " + xDisplacement; } } return xResult; } protected string GetCondition(Token aToken) { if (aToken.Value == "<") { return "ConditionalTestEnum.LessThan"; } else if (aToken.Value == ">") { return "ConditionalTestEnum.GreaterThan"; } else if (aToken.Value == "=" || aToken.Value == "0") { return "ConditionalTestEnum.Zero"; } else if (aToken.Value == "!=") { return "ConditionalTestEnum.NotZero"; } else if (aToken.Value == "<=") { return "ConditionalTestEnum.BelowOrEqual"; } else if (aToken.Value == ">=") { return "ConditionalTestEnum.AboveOrEqual"; } else { throw new Exception("Unrecognized symbol in conditional: " + aToken.Value); } } protected void AddPatterns() { AddPattern("! Move EAX, 0", "{0}"); AddPattern("# Comment", delegate(TokenList aTokens, ref List rCode) { if (EmitUserComments) { string xValue = aTokens[0].Value; xValue = xValue.Replace("\"", "\\\""); rCode.Add("; " + xValue); } }); // Labels // Local and proc level are used most, so designed to make their syntax shortest. // Think of the dots like a directory, . is current group, .. is above that. // ..Name: - Global level. Emitted exactly as is. // .Name: - Group level. Group_Name // Name: - Procedure level. Group_ProcName_Name AddPattern("_ABC:", delegate(TokenList aTokens, ref List rCode) { rCode.Add(GetLabel(aTokens[0]) + ":"); }); AddPattern("Call _ABC", delegate(TokenList aTokens, ref List rCode) { rCode.Add("Call " + GetLabel(aTokens[1])); }); AddPattern("Goto _ABC", delegate(TokenList aTokens, ref List rCode) { rCode.Add("Jp " + GetLabel(aTokens[1])); }); AddPattern(true, "var _ABC", delegate(TokenList aTokens, ref List rCode) { rCode.Add("mAssembler.DataMembers.Add(new DataMember(" + Quoted(GetLabel(aTokens[1])) + ", 0));"); }); AddPattern(true, "var _ABC = 123", delegate(TokenList aTokens, ref List rCode) { rCode.Add("mAssembler.DataMembers.Add(new DataMember(" + Quoted(GetLabel(aTokens[1])) + ", " + aTokens[3].Value + "));"); }); AddPattern(true, "var _ABC = 'Text'", delegate(TokenList aTokens, ref List rCode) { rCode.Add("mAssembler.DataMembers.Add(new DataMember(" + Quoted(GetLabel(aTokens[1])) + ", \"" + aTokens[3].Value + "\"));"); }); AddPattern(true, "var _ABC _ABC[123]", delegate(TokenList aTokens, ref List rCode) { rCode.Add("mAssembler.DataMembers.Add(new DataMember(" + Quoted(GetLabel(aTokens[1])) + ", new " + aTokens[2].Value + "[" + aTokens[4].Value + "]));"); }); AddPattern(true, new string[] { "if 0 goto _ABC", "if < goto _ABC", "if > goto _ABC", "if = goto _ABC", "if != goto _ABC", "if <= goto _ABC", "if >= goto _ABC" }, delegate(TokenList aTokens, ref List rCode) { string xLabel = GetLabel(aTokens[3]); var xCondition = GetCondition(aTokens[1]); rCode.Add("new ConditionalJump {{ Condition = " + xCondition + ", DestinationLabel = " + Quoted(xLabel) + " }};"); } ); AddPattern(true, new string[] { "if 0 Exit", "if < Exit", "if > Exit", "if = Exit", "if != Exit", "if <= Exit", "if >= Exit" }, delegate(TokenList aTokens, ref List rCode) { var xCondition = GetCondition(aTokens[1]); rCode.Add("new ConditionalJump {{ Condition = " + xCondition + ", DestinationLabel = " + Quoted(ProcLabel("Exit")) + " }};"); } ); // Must test separate since !0 is two tokens AddPattern(true, "if !0 goto _ABC", delegate(TokenList aTokens, ref List rCode) { string xLabel = GetLabel(aTokens[4]); rCode.Add("new ConditionalJump {{ Condition = ConditionalTestEnum.NotZero, DestinationLabel = " + Quoted(xLabel) + " }};"); }); AddPattern(true, "if !0 Exit", delegate(TokenList aTokens, ref List rCode) { rCode.Add("new ConditionalJump {{ Condition = ConditionalTestEnum.NotZero, DestinationLabel = " + Quoted(ProcLabel("Exit")) + " }};"); }); AddPattern(true, new string[] { //0 1 2 3 4 5 "if _REG < 123 goto _ABC", "if _REG > 123 goto _ABC", "if _REG = 123 goto _ABC", "if _REG != 123 goto _ABC", "if _REG <= 123 goto _ABC", "if _REG >= 123 goto _ABC" }, delegate(TokenList aTokens, ref List rCode) { rCode.Add("new Compare {{ DestinationReg = RegistersEnum.{1}, SourceValue = {3} }};"); var xCondition = GetCondition(aTokens[2]); string xLabel = GetLabel(aTokens[5]); rCode.Add("new ConditionalJump {{ Condition = " + xCondition + ", DestinationLabel = " + Quoted(xLabel) + " }};"); } ); AddPattern(true, new string[] { //0 1 2 3 4 "if _REG < 123 Exit", "if _REG > 123 Exit", "if _REG = 123 Exit", "if _REG != 123 Exit", "if _REG <= 123 Exit", "if _REG >= 123 Exit" }, delegate(TokenList aTokens, ref List rCode) { rCode.Add("new Compare {{ DestinationReg = RegistersEnum.{1}, SourceValue = {3} }};"); var xCondition = GetCondition(aTokens[2]); rCode.Add("new ConditionalJump {{ Condition = " + xCondition + ", DestinationLabel = " + Quoted(ProcLabel("Exit")) + " }};"); } ); AddPattern(true, "_REG ?= 123", "new Compare {{ DestinationReg = RegistersEnum.{0}, SourceValue = {2} }};" ); AddPattern(true, "_REG ?= _ABC", delegate(TokenList aTokens, ref List rCode) { string xLabel = GetLabel(aTokens[2]); rCode.Add("new Compare {{ DestinationReg = RegistersEnum.{0}, SourceIsIndirect = true, SourceRef = Cosmos.Assembler.ElementReference.New(" + Quoted(xLabel) + ") }};"); }); AddPattern("_REG ?& 123", "Test {0}, {2}"); // ~ "infinite" shift because it loops AddPattern("_REG ~> 123", "ROR {0}, {2}"); AddPattern("_REG <~ 123", "ROL {0}, {2}"); AddPattern("_REG >> 123", "SHR {0}, {2}"); AddPattern("_REG << 123", "SHL {0}, {2}"); AddPattern("_REG = 123", "Mov dword {0}, {2}"); AddPattern("_REG[1] = 123", "Mov dword [{0} + {2}], {5}"); AddPattern("_REG[-1] = 123", "Mov dword [{0} - {2}], {5}"); // AddPattern(true, new string[] { "_REG = #_ABC", "_REG32[1] = #_ABC", "_REG32[-1] = #_ABC", }, delegate(TokenList aTokens, ref List rCode) { int xEqIdx = aTokens.IndexOf("="); string xDestReg = GetDestRegister(aTokens, 0); string xLabel = GroupLabel("Const_" + aTokens[xEqIdx + 2].Value); rCode.Add("new Mov {{" + xDestReg + " , SourceRef = Cosmos.Assembler.ElementReference.New(" + Quoted(xLabel) + ")" + " }};"); } ); AddPattern(true, new string[] { "_REG = _REG", "_REG = _REG32[1]", "_REG = _REG[-1]", "_REG32[1] = _REG", "_REG32[-1] = _REG" }, delegate(TokenList aTokens, ref List rCode) { int xEqIdx = aTokens.IndexOf("="); string xDestReg = GetDestRegister(aTokens, 0); string xSrcReg = GetSrcRegister(aTokens, xEqIdx + 1); rCode.Add("new Mov{{ " + xDestReg + ", " + xSrcReg + " }};"); } ); AddPattern(true, "_REG = _ABC", delegate(TokenList aTokens, ref List rCode) { string xLabel = GetLabel(aTokens[2]); rCode.Add("new Mov {{" + " DestinationReg = RegistersEnum.{0}" + " , SourceRef = Cosmos.Assembler.ElementReference.New(" + Quoted(xLabel) + "), SourceIsIndirect = true" + " }};"); }); // why not [var] like registers? Because its less frequent to access th ptr // and it is like a reg.. without [] to get the value... AddPattern(true, "_REG = @_ABC", delegate(TokenList aTokens, ref List rCode) { string xLabel = GetLabel(aTokens[3]); rCode.Add("new Mov {{" + " DestinationReg = RegistersEnum.{0}" + " , SourceRef = Cosmos.Assembler.ElementReference.New(" + Quoted(xLabel) + ")" + " }};"); }); AddPattern(new string[] { "Port[DX] = AL", "Port[DX] = AX", "Port[DX] = EAX" }, "Out DX, {5}" ); AddPattern(new string[] { "AL = Port[DX]", "AX = Port[DX]", "EAX = Port[DX]" }, "In {0}, DX" ); AddPattern("+123", "Push dword {1}"); AddPattern(true, "+123:12", "new Push {{" + " DestinationValue = {1}, Size = {3} " + "}};" ); AddPattern(true, "+_REG", "new Push {{" + " DestinationReg = RegistersEnum.{1}" + "}};" ); AddPattern(true, "-_REG", "new Pop {{" + " DestinationReg = RegistersEnum.{1}" + "}};" ); AddPattern(true, "_ABC = _REG", delegate(TokenList aTokens, ref List rCode) { string xLabel = GetLabel(aTokens[0]); rCode.Add("new Mov {{" + " DestinationRef = Cosmos.Assembler.ElementReference.New(" + Quoted(xLabel) + "), DestinationIsIndirect = true" + " , SourceReg = RegistersEnum.{2}" + " }};"); }); AddPattern(true, "_ABC = 123", delegate(TokenList aTokens, ref List rCode) { string xLabel = GetLabel(aTokens[0]); rCode.Add("new Mov {{" + " DestinationRef = Cosmos.Assembler.ElementReference.New(" + Quoted(xLabel) + "), DestinationIsIndirect = true" + " , SourceValue = {2}" + " }};"); }); // TODO: Allow asm to optimize these to Inc/Dec AddPattern(true, "_REG + 1", delegate(TokenList aTokens, ref List rCode) { if (IntValue(aTokens[2]) == 1) { rCode.Add("new INC {{ DestinationReg = RegistersEnum.{0} }};"); } else { rCode.Add("new Add {{ DestinationReg = RegistersEnum.{0}, SourceValue = {2} }};"); } }); AddPattern("_REG++", "Inc {0}"); AddPattern(true, "_REG - 1", delegate(TokenList aTokens, ref List rCode) { if (IntValue(aTokens[2]) == 1) { rCode.Add("new Dec {{ DestinationReg = RegistersEnum.{0} }};"); } else { rCode.Add("new Sub {{ DestinationReg = RegistersEnum.{0}, SourceValue = {2} }};"); } }); AddPattern("_REG--", "Dec {0}"); AddPattern(true, "}", delegate(TokenList aTokens, ref List rCode) { rCode.Add("new Label(\"" + mGroup + "_" + mProcedureName + "_Exit\");"); if (mInIntHandler) { rCode.Add("new IRET();"); } else { rCode.Add("new Return();"); } mProcedureName = null; }); AddPattern(true, "Group _ABC", delegate(TokenList aTokens, ref List rCode) { mGroup = aTokens[1].Value; }); AddPattern(true, "Exit", delegate(TokenList aTokens, ref List rCode) { rCode.Add("new Jump {{ DestinationLabel = " + Quoted(ProcLabel("Exit")) + " }};"); }); AddPattern(true, "InterruptHandler _ABC {", delegate(TokenList aTokens, ref List rCode) { mInIntHandler = true; mProcedureName = aTokens[1].Value; rCode.Add("new Label(\"" + mGroup + "_{1}\");"); }); AddPattern(true, "Jump _ABC", delegate(TokenList aTokens, ref List rCode) { rCode.Add("new Jump {{ DestinationLabel = \"" + mGroup + "_{1}\" }};"); } ); AddPattern("Return", "Ret"); AddPattern("ReturnInterrupt", "IRet"); AddPattern("PopAll", "Popad"); AddPattern("PushAll", "Pushad"); AddPattern(true, "Procedure _ABC {", delegate(TokenList aTokens, ref List rCode) { mInIntHandler = false; mProcedureName = aTokens[1].Value; rCode.Add("new Label(\"" + mGroup + "_{1}\");"); }); AddPattern(true, "Checkpoint 'Text'", delegate(TokenList aTokens, ref List rCode) { // This method emits a lot of ASM, but thats what we want becuase // at this point we need ASM as simple as possible and completely transparent. // No stack changes, no register mods, no calls, no jumps, etc. // TODO: Add an option on the debug project properties to turn this off. // Also see WriteDebugVideo in CosmosAssembler.cs var xPreBootLogging = true; if (xPreBootLogging) { UInt32 xVideo = 0xB8000; for (UInt32 i = xVideo; i < xVideo + 80 * 2; i = i + 2) { rCode.Add("new LiteralAssemblerCode(" + Quoted("mov byte [0x" + i.ToString("X") + "], 0") + ");"); rCode.Add("new LiteralAssemblerCode(" + Quoted("mov byte [0x" + (i + 1).ToString("X") + "], 0x02") + ");"); } foreach (var xChar in aTokens[1].Value) { rCode.Add("new LiteralAssemblerCode(" + Quoted("mov byte [0x" + xVideo.ToString("X") + "], " + (byte)xChar) + ");"); xVideo = xVideo + 2; } } }); } protected Pattern FindMatch(TokenList aTokens) { int xHash = aTokens.GetPatternHashCode(); // Get a list of matching hashes, but then we have to // search for exact pattern match because it is possible // to have duplicate hashes. Hashes just provide us a quick way // to reduce the search. foreach (var xPattern in mPatterns.Where(q => q.Hash == xHash)) { if (xPattern.Tokens.PatternMatches(aTokens)) { return xPattern; } } return null; } public List GetPatternCode(string aLine) { var xParser = new Parser(aLine, false, false); return GetPatternCode(xParser.Tokens); } public List GetPatternCode(TokenList aTokens) { var xResult = new List(); var xPattern = FindMatch(aTokens); if (xPattern == null) { return null; } xPattern.Code(aTokens, ref xResult); // Apply {0} etc into string for (int i = 0; i < xResult.Count; i++) { xResult[i] = string.Format(xResult[i], aTokens.Select(c => c.Value).ToArray()); } if (xPattern.OldCodeType == false) { for (int i = 0; i < xResult.Count; i++) { xResult[i] = "new LiteralAssemblerCode(\"" + xResult[i] + "\");"; } } return xResult; } public List GetNonPatternCode(TokenList aTokens) { List xCode = new List(); var xResult = new List(); // () could be handled by pattern, but best to keep in one place for future if (aTokens.Count == 2 && aTokens[0].Type == TokenType.AlphaNum && aTokens[1].Value == "()") { xCode.Add("Call ." + aTokens[0].Value); } foreach (var x in xCode) { var xLines = GetPatternCode(x); if (xLines == null) { return null; } xResult.AddRange(xLines); } if (xResult.Count == 0) { return null; } return xResult; } public List GetCode(string aLine) { var xParser = new Parser(aLine, false, false); var xTokens = xParser.Tokens; var xResult = GetPatternCode(xTokens); if (xResult != null) { return xResult; } return GetNonPatternCode(xTokens); } protected void AddPattern(string[] aPatterns, CodeFunc aCode) { AddPattern(false, aPatterns, aCode); } protected void AddPattern(bool aOldCodeType, string[] aPatterns, CodeFunc aCode) { foreach (var xPattern in aPatterns) { AddPattern(aOldCodeType, xPattern, aCode); } } protected void AddPattern(string aPattern, CodeFunc aCode) { AddPattern(false, aPattern, aCode); } protected void AddPattern(bool aOldCodeType, string aPattern, CodeFunc aCode) { var xParser = new Parser(aPattern, false, true); var xPattern = new Pattern() { Tokens = xParser.Tokens, Hash = xParser.Tokens.GetHashCode(), Code = aCode, OldCodeType = aOldCodeType }; mPatterns.Add(xPattern); } protected void AddPattern(string[] aPatterns, string aCode) { AddPattern(false, aPatterns, aCode); } protected void AddPattern(bool aOldCodeType, string[] aPatterns, string aCode) { AddPattern(aOldCodeType, aPatterns, delegate(TokenList aTokens, ref List rCode) { rCode.Add(aCode); }); } protected void AddPattern(string aPattern, string aCode) { AddPattern(false, aPattern, aCode); } protected void AddPattern(bool aOldCodeType, string aPattern, string aCode) { AddPattern(aOldCodeType, aPattern, delegate(TokenList aTokens, ref List rCode) { rCode.Add(aCode); }); } } }