--[[****************************************************************************** * * * PureBASIC Language Definition * * * * v1.7.1 - 2017/11/18 * * * ****************************************************************************** PureBASIC v5.00-5.61 -- The goal of this language definition is to emulate the way PureBASIC's native IDE highlights its code, including inline Assembly syntax coloring. When used with the "edit-purebasic" theme, PureBASIC code will be highlighted just like in its native IDE. Keywords from all PureBASIC versions (from 5.00 up to current) are added to the list (deprecated keywords are preserved) to ensure that any code written for PureBASIC >=5.00 will be parsed and highlighted correctly. Comments in color definitions refer to PureBASIC native IDE's default palette. ------------------------------------------------------------------------------ This language definition is maintained at the PureBASIC Archives project: https://github.com/tajmone/purebasic-archives/tree/master/syntax-highlighting/highlight (visit the above link for more info and resources on this lang definition) ------------------------------------------------------------------------------ Written by Tristano Ajmone: https://github.com/tajmone Released into the public domain according to the Unlicense terms: http://unlicense.org/ ------------------------------------------------------------------------------ ]] Description="PureBASIC" IgnoreCase=false Comments={ -- PB IDE color: #00AAAA (Persian Green/Tradewind) { Block=false, Nested=false, Delimiter = { [[ ; ]] } } } Strings={ -- PB IDE color: #0080FF (Azure Radiance) Delimiter=[[ " ]], Escape=[=[\\[abfnrtv"\\]]=], -- PB IDE color: same as String } --[[ STRINGS NOTE: There's more to PB strings than this delimiter definition. Escaped strings (~"") are handled via `Keyword Id=4` and custom code in the `OnStateChange()` function found below. --]] Operators=[[\&|<|>|\!|\||\=|\/|\*|\%|\+|\-|~]] -- PB IDE color: same as normal text (Black) -- NUMBERS > PB IDE color: same as normal text (Black) Digits=[[ (?x) # ============ HEX ============ # Pascal style ($FF): \$[0-9a-fA-F]+\b # ============ BINARY ============ | %[01]+\b # ============ FLOAT ============ # With decimal point separator: | \b[-]?\d+\.\d+(?:[eE][\-\+]?\d+)?[a-zA-Z]*\b # Without decimal point separator: | \b[-]?\d+(?:[eE][\-\+]?\d+)[a-zA-Z]*\b # ============ DECIMAL ============ | (? PB IDE color: #006666 (Blue Stone) + Bold List={ -- Keywords list built by parsing the tokens inside PureBASIC SDK's -- "SyntaxHilighting.dll" (from each PureBASIC version)... "Align", "And", "Array", "As", "Break", "CallDebugger", "Case", "CompilerCase", "CompilerDefault", "CompilerElse", "CompilerElseIf", "CompilerEndIf", "CompilerEndSelect", "CompilerError", "CompilerIf", "CompilerSelect", "CompilerWarning", "Continue", "Data", "DataSection", "Debug", "DebugLevel", "Declare", "DeclareC", "DeclareCDLL", "DeclareDLL", "DeclareModule", "Default", "Define", "Dim", "DisableASM", "DisableDebugger", "DisableExplicit", "Else", "ElseIf", "EnableASM", "EnableDebugger", "EnableExplicit", "End", "EndDataSection", "EndDeclareModule", "EndEnumeration", "EndIf", "EndImport", "EndInterface", "EndMacro", "EndModule", "EndProcedure", "EndSelect", "EndStructure", "EndStructureUnion", "EndWith", "Enumeration", "EnumerationBinary", "Extends", "FakeReturn", "For", "ForEach", "ForEver", "Global", "Gosub", "Goto", "If", "Import", "ImportC", "IncludeBinary", "IncludeFile", "IncludePath", "Interface", "List", "Macro", "MacroExpandedCount", "Map", "Module", "NewList", "NewMap", "Next", "Not", "Or", "Procedure", "ProcedureC", "ProcedureCDLL", "ProcedureDLL", "ProcedureReturn", "Protected", "Prototype", "PrototypeC", "ReDim", "Read", "Repeat", "Restore", "Return", "Runtime", "Select", "Shared", "Static", "Step", "Structure", "StructureUnion", "Swap", "Threaded", "To", "UndefineMacro", "Until", "Until ", "UnuseModule", "UseModule", "Wend", "While", "With", "XIncludeFile", "XOr", }, }, { Id=2, -- Constants > PB IDE color: #924B72 (Cannon Pink) Regex=[[ (#[a-zA-Z_]\w*\$?) ]] }, { Id=2, -- Inline ASM > PB IDE color: #924B72 (Cannon Pink) Regex=[[ ^\s*(![^;]*) ]], Group=1 }, { Id=2, -- ASM Keywords > PB IDE color: #924B72 (Cannon Pink) List={ -- Keywords list built by parsing the tokens inside PureBASIC SDK's -- "SyntaxHilighting.dll" (from each PureBASIC version)... "AAA", "AAD", "AAM", "AAS", "ADC", "ADD", "AND", "ARPL", "BOUND", "BSF", "BSR", "BSWAP", "BT", "BTC", "BTR", "BTS", "CALL", "CBW", "CDQ", "CLC", "CLD", "CLI", "CLTS", "CMC", "CMOVA", "CMOVAE", "CMOVB", "CMOVBE", "CMOVC", "CMOVE", "CMOVG", "CMOVGE", "CMOVL", "CMOVLE", "CMOVNA", "CMOVNAE", "CMOVNB", "CMOVNBE", "CMOVNC", "CMOVNE", "CMOVNG", "CMOVNGE", "CMOVNL", "CMOVNLE", "CMOVNO", "CMOVNP", "CMOVNS", "CMOVNZ", "CMOVO", "CMOVP", "CMOVPE", "CMOVPO", "CMOVS", "CMOVZ", "CMP", "CMPS", "CMPSB", "CMPSD", "CMPSW", "CMPXCHG", "CMPXCHG8B", "CWD", "CWDE", "DAA", "DAS", "DB", "DD", "DEC", "DIV", "DW", "EMMS", "ENTER", "ESC", "F2XM1", "FABS", "FADD", "FADDP", "FBLD", "FBSTP", "FCHS", "FCLEX", "FCMOVB", "FCMOVBE", "FCMOVE", "FCMOVNB", "FCMOVNBE", "FCMOVNE", "FCMOVNU", "FCMOVU", "FCOM", "FCOMI", "FCOMIP", "FCOMP", "FCOMPP", "FCOS", "FDECSTP", "FDIV", "FDIVP", "FDIVR", "FDIVRP", "FFREE", "FIADD", "FICOM", "FICOMP", "FIDIV", "FIDIVR", "FILD", "FIMUL", "FINCSTP", "FINIT", "FIST", "FISTP", "FISUB", "FISUBR", "FLD", "FLD1", "FLDCW", "FLDENV", "FLDL2E", "FLDL2T", "FLDLG2", "FLDLN2", "FLDPI", "FLDZ", "FMUL", "FMULP", "FNCLEX", "FNINIT", "FNOP", "FNSAVE", "FNSTCW", "FNSTENV", "FNSTSW", "FPATAN", "FPREM", "FPREM1", "FPTAN", "FRNDINT", "FRSTOR", "FSAVE", "FSCALE", "FSETPM", "FSIN", "FSINCOS", "FSQRT", "FST", "FSTCW", "FSTENV", "FSTP", "FSTSW", "FSUB", "FSUBP", "FSUBR", "FSUBRP", "FTST", "FUCOM", "FUCOMI", "FUCOMIP", "FUCOMP", "FUCOMPP", "FWAIT", "FXAM", "FXCH", "FXTRACT", "FYL2X", "FYL2XP1", "HLT", "IDIV", "IMUL", "IN", "INC", "INS", "INSB", "INSD", "INSW", "INT", "INTO", "INVD", "INVLPG", "IRET", "IRETD", "JA", "JAE", "JB", "JBE", "JC", "JCXZ", "JE", "JECXZ", "JG", "JGE", "JL", "JLE", "JMP", "JNA", "JNAE", "JNB", "JNBE", "JNC", "JNE", "JNG", "JNGE", "JNL", "JNLE", "JNO", "JNP", "JNS", "JNZ", "JO", "JP", "JPE", "JPO", "JS", "JZ", "LAHF", "LAR", "LDS", "LEA", "LEAVE", "LES", "LFS", "LGDT", "LGS", "LIDT", "LLDT", "LMSW", "LOCK", "LODS", "LODSB", "LODSD", "LODSW", "LOOP", "LOOPE", "LOOPNE", "LOOPNZ", "LOOPZ", "LSL", "LSS", "LTR", "MOV", "MOVD", "MOVQ", "MOVS", "MOVSB", "MOVSD", "MOVSW", "MOVSX", "MOVZX", "MUL", "NEG", "NOP", "NOT", "OR", "OUT", "OUTS", "OUTSB", "OUTSD", "OUTSW", "PACKSSDW", "PACKSSWB", "PACKUSWB", "PADDB", "PADDD", "PADDSB", "PADDSW", "PADDUSB", "PADDUSW", "PADDW", "PAND", "PANDN", "PCMPEQB", "PCMPEQD", "PCMPEQW", "PCMPGTB", "PCMPGTD", "PCMPGTW", "PMADDWD", "PMULHW", "POP", "POPA", "POPAD", "POPF", "POPFD", "POR", "PSLLD", "PSLLQ", "PSLLW", "PSRAD", "PSRAW", "PSRLD", "PSRLQ", "PSRLW", "PSUBB", "PSUBD", "PSUBSB", "PSUBSW", "PSUBUSB", "PSUBUSW", "PSUBW", "PUNPCKHBW", "PUNPCKHDQ", "PUNPCKHWD", "PUNPCKLBW", "PUNPCKLDQ", "PUNPCKLWD", "PUSH", "PUSHA", "PUSHAD", "PUSHF", "PUSHFD", "PXOR", "RCL", "RCR", "RDMSR", "RDPMC", "RDTSC", "REP", "REPE", "REPNE", "REPNZ", "REPZ", "RET", "RETF", "ROL", "ROR", "RSM", "SAHF", "SAL", "SAR", "SBB", "SCAS", "SCASB", "SCASD", "SCASW", "SETA", "SETAE", "SETB", "SETBE", "SETC", "SETE", "SETG", "SETGE", "SETL", "SETLE", "SETNA", "SETNAE", "SETNB", "SETNBE", "SETNC", "SETNE", "SETNG", "SETNGE", "SETNL", "SETNLE", "SETNO", "SETNP", "SETNS", "SETNZ", "SETO", "SETP", "SETPE", "SETPO", "SETS", "SETZ", "SGDT", "SHL", "SHLD", "SHR", "SHRD", "SIDT", "SLDT", "SMSW", "STC", "STD", "STI", "STOS", "STOSB", "STOSD", "STOSW", "STR", "SUB", "TEST", "UD2", "VERR", "VERW", "WAIT", "WBINVD", "WRMSR", "XADD", "XCHG", "XLAT", "XLATB", "XOR" }, }, { Id=3, -- Procedure calls > PB IDE color: #006666 (Blue Stone) Regex=[[ ([a-zA-Z_]\w*)(?:(?:\s*)\() ]], Group=1 }, { Id=4, -- Escaped-String Prefix ("~") > PB IDE color: same as strings Regex=[[ ~" ]], -- NOTE: In the final doc, this Keyword is converted to become -- part of the string [see OnStateChange() func below] }, } function OnStateChange(oldState, newState, token, kwgroup) --[[ Dismiss Escape-Sequences ========================= Currently, I couldn't find a way to preserve escape sequences without causing stray behaviour in string. So, for the time being they are just dismissed. ]] if newState==HL_ESC_SEQ then if oldState==HL_STRING then -- ESCAPE SEQUENCE FOUND INSIDE A STRING: if escapedString~=true then -- String is Literal (no escaping allowed)... escapeSeq = false if token=='\\"' then -- rejecting a \" will cause the \ to become part of the curr. string -- but the " will be thrown again to the parser, which will mistake it -- for a new string start. We'll use the `forceStringEnd` var to -- prevent this... forceStringEnd = true return HL_REJECT else -- all other escape sequences can be suppresed by assimilating -- them to the current string... forceStringEnd=false return HL_REJECT --HL_STRING end else -- String is Escapable... escapeSeq = true forceStringEnd=false return HL_ESC_SEQ end -- HANDLE TWO ESCAPRE SEQUENCES IN A ROW: elseif oldState==HL_ESC_SEQ then escapeSeq = true escapedString = true forceStringEnd=false return HL_ESC_SEQ else -- ESCAPE SEQUENCE FOUND OUTSIDE A STRING escapeSeq = false forceStringEnd=false escapedString = false return HL_REJECT end --[[ PB Escape Strings (~"...") ========================== Keyword 4 (~") is converted to string state, so the tilda becomes part of the actual string. The boolean var `escapedString` tracks this process. ]] elseif newState==HL_KEYWORD and kwgroup==4 then -- If ~" occurs inside a string, it's just a string with a tilda as last char... if oldState==HL_STRING then -- We use the `forceStringEnd` var trick here, like we did with -- rejected \" escape sequences above... forceStringEnd=true return HL_REJECT -- HL_STRING -- HL_REJECT -- In all other cases it's an escaped string delimiter (opening)... else escapedString = true escapeSeq = false forceStringEnd=false return HL_STRING end --[[ NEW STATE IS STRING: ]] elseif newState==HL_STRING then --[[ Handle The " After A Rejcted \" or ~" in Literal String ======================= A rejected \" or ~" led to a spurious " being fed to the parser which can be mistaken for a new string delimiter ]] if forceStringEnd==true then forceStringEnd=false return HL_STRING_END --[[ Sanitize String Starts ====================== Because Keyword 4 is converted to a string start, we must tell the parser to treat the next string delimiter as a string end! ]] elseif escapedString==true then escapedString = false return HL_STRING_END --[[ Sanitize String After Escape Sequence ===================================== Ensure that a " immediately following an escape sequence is treated as a string-end delimiter. ]] elseif token=='"' and escapeSeq==true then escapeSeq = false forceStringEnd=false escapedString = false return HL_STRING_END else escapeSeq = false forceStringEnd=false return HL_STRING end --[[ FOR ALL OTHER SYNTAX ITEMS:]] elseif oldState~=HL_STRING and oldState~=HL_ESC_SEQ then -- Reset all Trackers' States: This is required to avoid some edge-cases -- of strings corruption in complex source code... escapeSeq = false escapedString = false forceStringEnd=false return newState end end --[[========================================================================== CHANGELOG ============================================================================== v1.7.1 - 2017/11/18 (PureBASIC v5.61) - Syntax checked against new PureBASIC v5.61 (no changes detected) v1.7 - 2017/10/02 (PureBASIC v5.60) - IMPROVEMENTS: Escape sequences are now corretcly parsed and highlighted. v1.6 - 2017/09/30 (PureBASIC v5.60) - IMPROVEMENTS: Added numbers definition (hex, binary, floats an decimals) - BUG-FIX: String concatenations didn't always parse correctly; now this was fixed (at the expenses of escaped sequences). - ABROGATED: parsing of escape sequences is now disabled because it caused too many problems with strings. v1.5 - 2017/09/28 (PureBASIC v5.60) - IMPROVEMENTS: - Escaped-String Prefix (~) is no longer handled as a keyword (ID=4/kwd) but its recognized as a valid string delimiter. - Escape Sequences are further sanitized so that they can occur only inside strings with the ~" opening delimiter. - ABROGATED: - The lang definition no longer uses Keyword 4 (Escaped-String Prefix). v1.4 - 2017/09/27 (PureBASIC v5.60) - BUG-FIX: Added sanitize function to avoid false positive Escape Sequences in structured vars (eg: "\v" in "SomeStructure\var1"). (Thanks to André Simon -- see Issue #23:) -- https://github.com/andre-simon/highlight/issues/23 v1.3 - 2017/05/20 (PureBASIC v5.60) - fixed single line comment delimiter v1.2 - 2017/05/11 (PureBASIC v5.60) - Added ASM keywords and support for inline ASM (via "!" syntax). - BUG-FIXES: - Repaired missing or mispelled PB Keywords (something went wrong in the keywords list of the v1.1 of this lang definition, some tokens were lost, other fused into a single token -- sorry about that). v1.1 - 2017/04/30 (PureBASIC v5.60) - Keywords list now built by extracting them from the PureBASIC SDK's "SyntaxHilighting.dll" (from each PureBASIC version). Tokens from each version are added to the list, and renamed or removed tokens are kept for the sake of covering all versions of the language from PureBASIC v5.00 upward. (NOTE: currently, there are no renamed or deprecated tokens in the keywords list). For more info, see: -- http://www.purebasic.fr/english/viewtopic.php?&p=506269 -- https://github.com/tajmone/purebasic-archives/tree/master/syntax-highlighting/guidelines v1.0 - 2016/10/27 (PureBASIC v5.50) - First release. Keywords list taken and adapted from GuShH's PureBasic language file for GeSHi: -- https://github.com/easybook/geshi/blob/master/geshi/purebasic.php --]]