// clang-format off /* * Copyright (c) 2009, Vincent Berthoux * * This source code is released for free distribution under the terms of the * GNU General Public License. * * This module contains functions for generating tags for Objective Caml * language files. */ /* * INCLUDE FILES */ #include "third_party/ctags/general.h" /* must always come first */ #include "libc/mem/alg.h" #include "libc/str/str.h" #include "third_party/ctags/keyword.h" #include "third_party/ctags/entry.h" #include "third_party/ctags/options.h" #include "third_party/ctags/read.h" #include "third_party/ctags/routines.h" #include "third_party/ctags/vstring.h" /* To get rid of unused parameter warning in * -Wextra */ #ifdef UNUSED #elif defined(__GNUC__) # define UNUSED(x) UNUSED_ ## x __attribute__((unused)) #elif defined(__LCLINT__) # define UNUSED(x) /*@unused@*/ x #else # define UNUSED(x) x #endif #define OCAML_MAX_STACK_SIZE 256 typedef enum { K_CLASS, /* Ocaml class, relatively rare */ K_METHOD, /* class method */ K_MODULE, /* Ocaml module OR functor */ K_VAR, K_TYPE, /* name of an OCaml type */ K_FUNCTION, K_CONSTRUCTOR, /* Constructor of a sum type */ K_RECORDFIELD, K_EXCEPTION } ocamlKind; static kindOption OcamlKinds[] = { {TRUE, 'c', "class", "classes"}, {TRUE, 'm', "method", "Object's method"}, {TRUE, 'M', "module", "Module or functor"}, {TRUE, 'v', "var", "Global variable"}, {TRUE, 't', "type", "Type name"}, {TRUE, 'f', "function", "A function"}, {TRUE, 'C', "Constructor", "A constructor"}, {TRUE, 'r', "Record field", "A 'structure' field"}, {TRUE, 'e', "Exception", "An exception"} }; typedef enum { OcaKEYWORD_and, OcaKEYWORD_begin, OcaKEYWORD_class, OcaKEYWORD_do, OcaKEYWORD_done, OcaKEYWORD_else, OcaKEYWORD_end, OcaKEYWORD_exception, OcaKEYWORD_for, OcaKEYWORD_functor, OcaKEYWORD_fun, OcaKEYWORD_if, OcaKEYWORD_in, OcaKEYWORD_let, OcaKEYWORD_value, OcaKEYWORD_match, OcaKEYWORD_method, OcaKEYWORD_module, OcaKEYWORD_mutable, OcaKEYWORD_object, OcaKEYWORD_of, OcaKEYWORD_rec, OcaKEYWORD_sig, OcaKEYWORD_struct, OcaKEYWORD_then, OcaKEYWORD_try, OcaKEYWORD_type, OcaKEYWORD_val, OcaKEYWORD_virtual, OcaKEYWORD_while, OcaKEYWORD_with, OcaIDENTIFIER, Tok_PARL, /* '(' */ Tok_PARR, /* ')' */ Tok_BRL, /* '[' */ Tok_BRR, /* ']' */ Tok_CurlL, /* '{' */ Tok_CurlR, /* '}' */ Tok_Prime, /* '\'' */ Tok_Pipe, /* '|' */ Tok_EQ, /* '=' */ Tok_Val, /* string/number/poo */ Tok_Op, /* any operator recognized by the language */ Tok_semi, /* ';' */ Tok_comma, /* ',' */ Tok_To, /* '->' */ Tok_Sharp, /* '#' */ Tok_Backslash, /* '\\' */ Tok_EOF /* END of file */ } ocamlKeyword; typedef struct sOcaKeywordDesc { const char *name; ocamlKeyword id; } ocaKeywordDesc; typedef ocamlKeyword ocaToken; static const ocaKeywordDesc OcamlKeywordTable[] = { { "and" , OcaKEYWORD_and }, { "begin" , OcaKEYWORD_begin }, { "class" , OcaKEYWORD_class }, { "do" , OcaKEYWORD_do }, { "done" , OcaKEYWORD_done }, { "else" , OcaKEYWORD_else }, { "end" , OcaKEYWORD_end }, { "exception" , OcaKEYWORD_exception }, { "for" , OcaKEYWORD_for }, { "fun" , OcaKEYWORD_fun }, { "function" , OcaKEYWORD_fun }, { "functor" , OcaKEYWORD_functor }, { "in" , OcaKEYWORD_in }, { "let" , OcaKEYWORD_let }, { "match" , OcaKEYWORD_match }, { "method" , OcaKEYWORD_method }, { "module" , OcaKEYWORD_module }, { "mutable" , OcaKEYWORD_mutable }, { "object" , OcaKEYWORD_object }, { "of" , OcaKEYWORD_of }, { "rec" , OcaKEYWORD_rec }, { "sig" , OcaKEYWORD_sig }, { "struct" , OcaKEYWORD_struct }, { "then" , OcaKEYWORD_then }, { "try" , OcaKEYWORD_try }, { "type" , OcaKEYWORD_type }, { "val" , OcaKEYWORD_val }, { "value" , OcaKEYWORD_value }, /* just to handle revised syntax */ { "virtual" , OcaKEYWORD_virtual }, { "while" , OcaKEYWORD_while }, { "with" , OcaKEYWORD_with }, { "or" , Tok_Op }, { "mod " , Tok_Op }, { "land " , Tok_Op }, { "lor " , Tok_Op }, { "lxor " , Tok_Op }, { "lsl " , Tok_Op }, { "lsr " , Tok_Op }, { "asr" , Tok_Op }, { "->" , Tok_To }, { "true" , Tok_Val }, { "false" , Tok_Val } }; static langType Lang_Ocaml; boolean exportLocalInfo = FALSE; /*////////////////////////////////////////////////////////////////// //// lexingInit */ typedef struct _lexingState { vString *name; /* current parsed identifier/operator */ const unsigned char *cp; /* position in stream */ } lexingState; /* array of the size of all possible value for a char */ boolean isOperator[1 << (8 * sizeof (char))] = { FALSE }; static void initKeywordHash ( void ) { const size_t count = sizeof (OcamlKeywordTable) / sizeof (ocaKeywordDesc); size_t i; for (i = 0; i < count; ++i) { addKeyword (OcamlKeywordTable[i].name, Lang_Ocaml, (int) OcamlKeywordTable[i].id); } } /* definition of all the operator in OCaml, * /!\ certain operator get special treatment * in regards of their role in OCaml grammar : * '|' ':' '=' '~' and '?' */ static void initOperatorTable ( void ) { isOperator['!'] = TRUE; isOperator['$'] = TRUE; isOperator['%'] = TRUE; isOperator['&'] = TRUE; isOperator['*'] = TRUE; isOperator['+'] = TRUE; isOperator['-'] = TRUE; isOperator['.'] = TRUE; isOperator['/'] = TRUE; isOperator[':'] = TRUE; isOperator['<'] = TRUE; isOperator['='] = TRUE; isOperator['>'] = TRUE; isOperator['?'] = TRUE; isOperator['@'] = TRUE; isOperator['^'] = TRUE; isOperator['~'] = TRUE; isOperator['|'] = TRUE; } /*////////////////////////////////////////////////////////////////////// //// Lexing */ static boolean isNum (char c) { return c >= '0' && c <= '9'; } static boolean isLowerAlpha (char c) { return c >= 'a' && c <= 'z'; } static boolean isUpperAlpha (char c) { return c >= 'A' && c <= 'Z'; } static boolean isAlpha (char c) { return isLowerAlpha (c) || isUpperAlpha (c); } static boolean isIdent (char c) { return isNum (c) || isAlpha (c) || c == '_' || c == '\''; } static boolean isSpace (char c) { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } static void eatWhiteSpace (lexingState * st) { const unsigned char *cp = st->cp; while (isSpace (*cp)) cp++; st->cp = cp; } static void eatString (lexingState * st) { boolean lastIsBackSlash = FALSE; boolean unfinished = TRUE; const unsigned char *c = st->cp + 1; while (unfinished) { /* end of line should never happen. * we tolerate it */ if (c == NULL || c[0] == '\0') break; else if (*c == '"' && !lastIsBackSlash) unfinished = FALSE; else lastIsBackSlash = *c == '\\'; c++; } st->cp = c; } static void eatComment (lexingState * st) { boolean unfinished = TRUE; boolean lastIsStar = FALSE; const unsigned char *c = st->cp + 2; while (unfinished) { /* we've reached the end of the line.. * so we have to reload a line... */ if (c == NULL || *c == '\0') { st->cp = fileReadLine (); /* WOOPS... no more input... * we return, next lexing read * will be null and ok */ if (st->cp == NULL) return; c = st->cp; } /* we've reached the end of the comment */ else if (*c == ')' && lastIsStar) unfinished = FALSE; /* here we deal with imbricated comment, which * are allowed in OCaml */ else if (c[0] == '(' && c[1] == '*') { st->cp = c; eatComment (st); c = st->cp; if (c == NULL) return; lastIsStar = FALSE; c++; } /* OCaml has a rule which says : * * "Comments do not occur inside string or character literals. * Nested comments are handled correctly." * * So if we encounter a string beginning, we must parse it to * get a good comment nesting (bug ID: 3117537) */ else if (*c == '"') { st->cp = c; eatString (st); c = st->cp; } else { lastIsStar = '*' == *c; c++; } } st->cp = c; } static void readIdentifier (lexingState * st) { const unsigned char *p; vStringClear (st->name); /* first char is a simple letter */ if (isAlpha (*st->cp) || *st->cp == '_') vStringPut (st->name, (int) *st->cp); /* Go till you get identifier chars */ for (p = st->cp + 1; isIdent (*p); p++) vStringPut (st->name, (int) *p); st->cp = p; vStringTerminate (st->name); } static ocamlKeyword eatNumber (lexingState * st) { while (isNum (*st->cp)) st->cp++; return Tok_Val; } /* Operator can be defined in OCaml as a function * so we must be ample enough to parse them normally */ static ocamlKeyword eatOperator (lexingState * st) { int count = 0; const unsigned char *root = st->cp; vStringClear (st->name); while (isOperator[st->cp[count]]) { vStringPut (st->name, st->cp[count]); count++; } vStringTerminate (st->name); st->cp += count; if (count <= 1) { switch (root[0]) { case '|': return Tok_Pipe; case '=': return Tok_EQ; default: return Tok_Op; } } else if (count == 2 && root[0] == '-' && root[1] == '>') return Tok_To; else return Tok_Op; } /* The lexer is in charge of reading the file. * Some of sub-lexer (like eatComment) also read file. * lexing is finished when the lexer return Tok_EOF */ static ocamlKeyword lex (lexingState * st) { int retType; /* handling data input here */ while (st->cp == NULL || st->cp[0] == '\0') { st->cp = fileReadLine (); if (st->cp == NULL) return Tok_EOF; } if (isAlpha (*st->cp)) { readIdentifier (st); retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml); if (retType == -1) /* If it's not a keyword */ { return OcaIDENTIFIER; } else { return retType; } } else if (isNum (*st->cp)) return eatNumber (st); else if (isSpace (*st->cp)) { eatWhiteSpace (st); return lex (st); } /* OCaml permit the definition of our own operators * so here we check all the consecuting chars which * are operators to discard them. */ else if (isOperator[*st->cp]) return eatOperator (st); else switch (*st->cp) { case '(': if (st->cp[1] == '*') /* ergl, a comment */ { eatComment (st); return lex (st); } else { st->cp++; return Tok_PARL; } case ')': st->cp++; return Tok_PARR; case '[': st->cp++; return Tok_BRL; case ']': st->cp++; return Tok_BRR; case '{': st->cp++; return Tok_CurlL; case '}': st->cp++; return Tok_CurlR; case '\'': st->cp++; return Tok_Prime; case ',': st->cp++; return Tok_comma; case '=': st->cp++; return Tok_EQ; case ';': st->cp++; return Tok_semi; case '"': eatString (st); return Tok_Val; case '_': st->cp++; return Tok_Val; case '#': st->cp++; return Tok_Sharp; case '\\': st->cp++; return Tok_Backslash; default: st->cp++; break; } /* default return if nothing is recognized, * shouldn't happen, but at least, it will * be handled without destroying the parsing. */ return Tok_Val; } /*////////////////////////////////////////////////////////////////////// //// Parsing */ typedef void (*parseNext) (vString * const ident, ocaToken what); /********** Helpers */ /* This variable hold the 'parser' which is going to * handle the next token */ static parseNext toDoNext; /* Special variable used by parser eater to * determine which action to put after their * job is finished. */ static parseNext comeAfter; /* If a token put an end to current delcaration/ * statement */ static ocaToken terminatingToken; /* Token to be searched by the different * parser eater. */ static ocaToken waitedToken; /* name of the last class, used for * context stacking. */ static vString *lastClass; static vString *voidName; typedef enum _sContextKind { ContextStrong, ContextSoft } contextKind; typedef enum _sContextType { ContextType, ContextModule, ContextClass, ContextValue, ContextFunction, ContextMethod, ContextBlock } contextType; typedef struct _sOcamlContext { contextKind kind; /* well if the context is strong or not */ contextType type; parseNext callback; /* what to do when a context is pop'd */ vString *contextName; /* name, if any, of the surrounding context */ } ocamlContext; /* context stack, can be used to output scope information * into the tag file. */ static ocamlContext stack[OCAML_MAX_STACK_SIZE]; /* current position in the tag */ static int stackIndex; /* special function, often recalled, so putting it here */ static void globalScope (vString * const ident, ocaToken what); /* Return : index of the last named context if one * is found, -1 otherwise */ static int getLastNamedIndex ( void ) { int i; for (i = stackIndex - 1; i >= 0; --i) { if (vStringLength (stack[i].contextName) > 0) { return i; } } return -1; } static const char *contextDescription (contextType t) { switch (t) { case ContextFunction: return "function"; case ContextMethod: return "method"; case ContextValue: return "value"; case ContextModule: return "Module"; case ContextType: return "type"; case ContextClass: return "class"; case ContextBlock: return "begin/end"; } return NULL; } static char contextTypeSuffix (contextType t) { switch (t) { case ContextFunction: case ContextMethod: case ContextValue: case ContextModule: return '/'; case ContextType: return '.'; case ContextClass: return '#'; case ContextBlock: return ' '; } return '$'; } /* Push a new context, handle null string */ static void pushContext (contextKind kind, contextType type, parseNext after, vString const *contextName) { int parentIndex; if (stackIndex >= OCAML_MAX_STACK_SIZE) { verbose ("OCaml Maximum depth reached"); return; } stack[stackIndex].kind = kind; stack[stackIndex].type = type; stack[stackIndex].callback = after; parentIndex = getLastNamedIndex (); if (contextName == NULL) { vStringClear (stack[stackIndex++].contextName); return; } if (parentIndex >= 0) { vStringCopy (stack[stackIndex].contextName, stack[parentIndex].contextName); vStringPut (stack[stackIndex].contextName, contextTypeSuffix (stack[parentIndex].type)); vStringCat (stack[stackIndex].contextName, contextName); } else vStringCopy (stack[stackIndex].contextName, contextName); stackIndex++; } static void pushStrongContext (vString * name, contextType type) { pushContext (ContextStrong, type, &globalScope, name); } static void pushSoftContext (parseNext continuation, vString * name, contextType type) { pushContext (ContextSoft, type, continuation, name); } static void pushEmptyContext (parseNext continuation) { pushContext (ContextSoft, ContextValue, continuation, NULL); } /* unroll the stack until the last named context. * then discard it. Used to handle the : * let f x y = ... * in ... * where the context is reseted after the in. Context may have * been really nested before that. */ static void popLastNamed ( void ) { int i = getLastNamedIndex (); if (i >= 0) { stackIndex = i; toDoNext = stack[i].callback; vStringClear (stack[i].contextName); } else { /* ok, no named context found... * (should not happen). */ stackIndex = 0; toDoNext = &globalScope; } } /* pop a context without regarding it's content * (beside handling empty stack case) */ static void popSoftContext ( void ) { if (stackIndex <= 0) { toDoNext = &globalScope; } else { stackIndex--; toDoNext = stack[stackIndex].callback; vStringClear (stack[stackIndex].contextName); } } /* Reset everything until the last global space. * a strong context can be : * - module * - class definition * - the initial global space * - a _global_ delcaration (let at global scope or in a module). * Created to exit quickly deeply nested context */ static contextType popStrongContext ( void ) { int i; for (i = stackIndex - 1; i >= 0; --i) { if (stack[i].kind == ContextStrong) { stackIndex = i; toDoNext = stack[i].callback; vStringClear (stack[i].contextName); return stack[i].type; } } /* ok, no strong context found... */ stackIndex = 0; toDoNext = &globalScope; return -1; } /* Ignore everything till waitedToken and jump to comeAfter. * If the "end" keyword is encountered break, doesn't remember * why though. */ static void tillToken (vString * const UNUSED (ident), ocaToken what) { if (what == waitedToken) toDoNext = comeAfter; else if (what == OcaKEYWORD_end) { popStrongContext (); toDoNext = &globalScope; } } /* Ignore everything till a waitedToken is seen, but * take care of balanced parentheses/bracket use */ static void contextualTillToken (vString * const UNUSED (ident), ocaToken what) { static int parentheses = 0; static int bracket = 0; static int curly = 0; switch (what) { case Tok_PARL: parentheses--; break; case Tok_PARR: parentheses++; break; case Tok_CurlL: curly--; break; case Tok_CurlR: curly++; break; case Tok_BRL: bracket--; break; case Tok_BRR: bracket++; break; default: /* other token are ignored */ break; } if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0) toDoNext = comeAfter; else if (what == OcaKEYWORD_end) { popStrongContext (); toDoNext = &globalScope; } } /* Wait for waitedToken and jump to comeAfter or let * the globalScope handle declarations */ static void tillTokenOrFallback (vString * const ident, ocaToken what) { if (what == waitedToken) toDoNext = comeAfter; else globalScope (ident, what); } /* ignore token till waitedToken, or give up if find * terminatingToken. Use globalScope to handle new * declarations. */ static void tillTokenOrTerminatingOrFallback (vString * const ident, ocaToken what) { if (what == waitedToken) toDoNext = comeAfter; else if (what == terminatingToken) toDoNext = globalScope; else globalScope (ident, what); } /* ignore the next token in the stream and jump to the * given comeAfter state */ static void ignoreToken (vString * const UNUSED (ident), ocaToken UNUSED (what)) { toDoNext = comeAfter; } /********** Grammar */ /* the purpose of each function is detailled near their * implementation */ static void killCurrentState ( void ) { /* Tracking the kind of previous strong * context, if it doesn't match with a * really strong entity, repop */ switch (popStrongContext ()) { case ContextValue: popStrongContext (); break; case ContextFunction: popStrongContext (); break; case ContextMethod: popStrongContext (); break; case ContextType: popStrongContext(); break; case ContextBlock: break; case ContextModule: break; case ContextClass: break; default: /* nothing more */ break; } } /* used to prepare tag for OCaml, just in case their is a need to * add additional information to the tag. */ static void prepareTag (tagEntryInfo * tag, vString const *name, ocamlKind kind) { int parentIndex; initTagEntry (tag, vStringValue (name)); tag->kindName = OcamlKinds[kind].name; tag->kind = OcamlKinds[kind].letter; if (kind == K_MODULE) { tag->lineNumberEntry = TRUE; tag->lineNumber = 1; } parentIndex = getLastNamedIndex (); if (parentIndex >= 0) { tag->extensionFields.scope[0] = contextDescription (stack[parentIndex].type); tag->extensionFields.scope[1] = vStringValue (stack[parentIndex].contextName); } } /* Used to centralise tag creation, and be able to add * more information to it in the future */ static void addTag (vString * const ident, int kind) { if (OcamlKinds [kind].enabled && ident != NULL && vStringLength (ident) > 0) { tagEntryInfo toCreate; prepareTag (&toCreate, ident, kind); makeTagEntry (&toCreate); } } boolean needStrongPoping = FALSE; static void requestStrongPoping ( void ) { needStrongPoping = TRUE; } static void cleanupPreviousParser ( void ) { if (needStrongPoping) { needStrongPoping = FALSE; popStrongContext (); } } /* Due to some circular dependencies, the following functions * must be forward-declared. */ static void letParam (vString * const ident, ocaToken what); static void localScope (vString * const ident, ocaToken what); static void mayRedeclare (vString * const ident, ocaToken what); static void typeSpecification (vString * const ident, ocaToken what); /* * Parse a record type * type ident = // parsed previously * { * ident1: type1; * ident2: type2; * } */ static void typeRecord (vString * const ident, ocaToken what) { switch (what) { case OcaIDENTIFIER: addTag (ident, K_RECORDFIELD); terminatingToken = Tok_CurlR; waitedToken = Tok_semi; comeAfter = &typeRecord; toDoNext = &tillTokenOrTerminatingOrFallback; break; case OcaKEYWORD_mutable: /* ignore it */ break; case Tok_CurlR: popStrongContext (); toDoNext = &globalScope; break; default: /* don't care */ break; } } /* handle : * exception ExceptionName of ... */ static void exceptionDecl (vString * const ident, ocaToken what) { if (what == OcaIDENTIFIER) { addTag (ident, K_EXCEPTION); } else /* probably ill-formed, give back to global scope */ { globalScope (ident, what); } toDoNext = &globalScope; } tagEntryInfo tempTag; vString *tempIdent; /* Ensure a constructor is not a type path beginning * with a module */ static void constructorValidation (vString * const ident, ocaToken what) { switch (what) { case Tok_Op: /* if we got a '.' which is an operator */ toDoNext = &globalScope; popStrongContext (); needStrongPoping = FALSE; break; case OcaKEYWORD_of: /* OK, it must be a constructor :) */ makeTagEntry (&tempTag); vStringClear (tempIdent); toDoNext = &tillTokenOrFallback; comeAfter = &typeSpecification; waitedToken = Tok_Pipe; break; case Tok_Pipe: /* OK, it was a constructor :) */ makeTagEntry (&tempTag); vStringClear (tempIdent); toDoNext = &typeSpecification; break; default: /* and mean that we're not facing a module name */ makeTagEntry (&tempTag); vStringClear (tempIdent); toDoNext = &tillTokenOrFallback; comeAfter = &typeSpecification; waitedToken = Tok_Pipe; /* nothing in the context, discard it */ popStrongContext (); /* to be sure we use this token */ globalScope (ident, what); } } /* Parse beginning of type definition * type 'avar ident = * or * type ('var1, 'var2) ident = */ static void typeDecl (vString * const ident, ocaToken what) { switch (what) { /* parameterized */ case Tok_Prime: comeAfter = &typeDecl; toDoNext = &ignoreToken; break; /* LOTS of parameters */ case Tok_PARL: comeAfter = &typeDecl; waitedToken = Tok_PARR; toDoNext = &tillToken; break; case OcaIDENTIFIER: addTag (ident, K_TYPE); pushStrongContext (ident, ContextType); requestStrongPoping (); waitedToken = Tok_EQ; comeAfter = &typeSpecification; toDoNext = &tillTokenOrFallback; break; default: globalScope (ident, what); } } /* Parse type of kind * type bidule = Ctor1 of ... * | Ctor2 * | Ctor3 of ... * or * type bidule = | Ctor1 of ... | Ctor2 * * when type bidule = { ... } is detected, * let typeRecord handle it. */ static void typeSpecification (vString * const ident, ocaToken what) { switch (what) { case OcaIDENTIFIER: if (isUpperAlpha (ident->buffer[0])) { /* here we handle type aliases of type * type foo = AnotherModule.bar * AnotherModule can mistakenly be took * for a constructor. */ vStringCopy (tempIdent, ident); prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR); toDoNext = &constructorValidation; } else { toDoNext = &tillTokenOrFallback; comeAfter = &typeSpecification; waitedToken = Tok_Pipe; } break; case OcaKEYWORD_and: toDoNext = &typeDecl; break; case Tok_BRL: /* the '[' & ']' are ignored to accommodate */ case Tok_BRR: /* with the revised syntax */ case Tok_Pipe: /* just ignore it */ break; case Tok_CurlL: toDoNext = &typeRecord; break; default: /* don't care */ break; } } static boolean dirtySpecialParam = FALSE; /* parse the ~label and ~label:type parameter */ static void parseLabel (vString * const ident, ocaToken what) { static int parCount = 0; switch (what) { case OcaIDENTIFIER: if (!dirtySpecialParam) { if (exportLocalInfo) addTag (ident, K_VAR); dirtySpecialParam = TRUE; } break; case Tok_PARL: parCount++; break; case Tok_PARR: parCount--; if (parCount == 0) toDoNext = &letParam; break; case Tok_Op: if (ident->buffer[0] == ':') { toDoNext = &ignoreToken; comeAfter = &letParam; } else if (parCount == 0 && dirtySpecialParam) { toDoNext = &letParam; letParam (ident, what); } break; default: if (parCount == 0 && dirtySpecialParam) { toDoNext = &letParam; letParam (ident, what); } break; } } /* Optional argument with syntax like this : * ?(foo = value) */ static void parseOptionnal (vString * const ident, ocaToken what) { static int parCount = 0; switch (what) { case OcaIDENTIFIER: if (!dirtySpecialParam) { if (exportLocalInfo) addTag (ident, K_VAR); dirtySpecialParam = TRUE; if (parCount == 0) toDoNext = &letParam; } break; case Tok_PARL: parCount++; break; case Tok_PARR: parCount--; if (parCount == 0) toDoNext = &letParam; break; default: /* don't care */ break; } } /** handle let inside functions (so like it's name * say : local let */ static void localLet (vString * const ident, ocaToken what) { switch (what) { case Tok_PARL: /* We ignore this token to be able to parse such * declarations : * let (ident : type) = ... */ break; case OcaKEYWORD_rec: /* just ignore to be able to parse such declarations: * let rec ident = ... */ break; case Tok_Op: /* we are defining a new operator, it's a * function definition */ if (exportLocalInfo) addTag (ident, K_FUNCTION); pushSoftContext (mayRedeclare, ident, ContextFunction); toDoNext = &letParam; break; /* Can be a weiiird binding, or an '_' */ case Tok_Val: if (exportLocalInfo) addTag (ident, K_VAR); pushSoftContext (mayRedeclare, ident, ContextValue); toDoNext = &letParam; break; case OcaIDENTIFIER: if (exportLocalInfo) addTag (ident, K_VAR); pushSoftContext (mayRedeclare, ident, ContextValue); toDoNext = &letParam; break; case OcaKEYWORD_end: popStrongContext (); break; default: toDoNext = &localScope; break; } } /* parse : * | pattern pattern -> ... * or * pattern apttern apttern -> ... * we ignore all identifiers declared in the pattern, * because their scope is likely to be even more limited * than the let definitions. * Used after a match ... with, or a function ... or fun ... * because their syntax is similar. */ static void matchPattern (vString * const ident, ocaToken what) { /* keep track of [], as it * can be used in patterns and can * mean the end of match expression in * revised syntax */ static int braceCount = 0; switch (what) { case Tok_To: pushEmptyContext (&matchPattern); toDoNext = &mayRedeclare; break; case Tok_BRL: braceCount++; break; case OcaKEYWORD_value: popLastNamed (); globalScope (ident, what); break; case OcaKEYWORD_in: popLastNamed (); break; default: break; } } /* Used at the beginning of a new scope (begin of a * definition, parenthesis...) to catch inner let * definition that may be in. */ static void mayRedeclare (vString * const ident, ocaToken what) { switch (what) { case OcaKEYWORD_value: // let globalScope handle it globalScope (ident, what); break; case OcaKEYWORD_let: case OcaKEYWORD_val: toDoNext = localLet; break; case OcaKEYWORD_object: vStringClear (lastClass); pushContext (ContextStrong, ContextClass, &localScope, NULL /*voidName */ ); needStrongPoping = FALSE; toDoNext = &globalScope; break; case OcaKEYWORD_for: case OcaKEYWORD_while: toDoNext = &tillToken; waitedToken = OcaKEYWORD_do; comeAfter = &mayRedeclare; break; case OcaKEYWORD_try: toDoNext = &mayRedeclare; pushSoftContext (matchPattern, ident, ContextFunction); break; case OcaKEYWORD_fun: toDoNext = &matchPattern; break; /* Handle the special ;; from the OCaml * Top level */ case Tok_semi: default: toDoNext = &localScope; localScope (ident, what); } } /* parse : * p1 p2 ... pn = ... * or * ?(p1=v) p2 ~p3 ~pn:ja ... = ... */ static void letParam (vString * const ident, ocaToken what) { switch (what) { case Tok_EQ: toDoNext = &mayRedeclare; break; case OcaIDENTIFIER: if (exportLocalInfo) addTag (ident, K_VAR); break; case Tok_Op: switch (ident->buffer[0]) { case ':': /*popSoftContext(); */ /* we got a type signature */ comeAfter = &mayRedeclare; toDoNext = &tillTokenOrFallback; waitedToken = Tok_EQ; break; /* parse something like * ~varname:type * or * ~varname * or * ~(varname: long type) */ case '~': toDoNext = &parseLabel; dirtySpecialParam = FALSE; break; /* Optional argument with syntax like this : * ?(bla = value) * or * ?bla */ case '?': toDoNext = &parseOptionnal; dirtySpecialParam = FALSE; break; default: break; } break; default: /* don't care */ break; } } /* parse object ... * used to be sure the class definition is not a type * alias */ static void classSpecif (vString * const UNUSED (ident), ocaToken what) { switch (what) { case OcaKEYWORD_object: pushStrongContext (lastClass, ContextClass); toDoNext = &globalScope; break; default: vStringClear (lastClass); toDoNext = &globalScope; } } /* Handle a method ... class declaration. * nearly a copy/paste of globalLet. */ static void methodDecl (vString * const ident, ocaToken what) { switch (what) { case Tok_PARL: /* We ignore this token to be able to parse such * declarations : * let (ident : type) = ... */ break; case OcaKEYWORD_mutable: case OcaKEYWORD_virtual: case OcaKEYWORD_rec: /* just ignore to be able to parse such declarations: * let rec ident = ... */ break; case OcaIDENTIFIER: addTag (ident, K_METHOD); /* Normal pushing to get good subs */ pushStrongContext (ident, ContextMethod); /*pushSoftContext( globalScope, ident, ContextMethod ); */ toDoNext = &letParam; break; case OcaKEYWORD_end: popStrongContext (); break; default: toDoNext = &globalScope; break; } } /* name of the last module, used for * context stacking. */ vString *lastModule; /* parse * ... struct (* new global scope *) end * or * ... sig (* new global scope *) end * or * functor ... -> moduleSpecif */ static void moduleSpecif (vString * const ident, ocaToken what) { switch (what) { case OcaKEYWORD_functor: toDoNext = &contextualTillToken; waitedToken = Tok_To; comeAfter = &moduleSpecif; break; case OcaKEYWORD_struct: case OcaKEYWORD_sig: pushStrongContext (lastModule, ContextModule); toDoNext = &globalScope; break; case Tok_PARL: /* ( */ toDoNext = &contextualTillToken; comeAfter = &globalScope; waitedToken = Tok_PARR; contextualTillToken (ident, what); break; default: vStringClear (lastModule); toDoNext = &globalScope; } } /* parse : * module name = ... * then pass the token stream to moduleSpecif */ static void moduleDecl (vString * const ident, ocaToken what) { switch (what) { case OcaKEYWORD_type: /* just ignore it, name come after */ break; case OcaIDENTIFIER: addTag (ident, K_MODULE); vStringCopy (lastModule, ident); waitedToken = Tok_EQ; comeAfter = &moduleSpecif; toDoNext = &contextualTillToken; break; default: /* don't care */ break; } } /* parse : * class name = ... * or * class virtual ['a,'b] classname = ... */ static void classDecl (vString * const ident, ocaToken what) { switch (what) { case OcaIDENTIFIER: addTag (ident, K_CLASS); vStringCopy (lastClass, ident); toDoNext = &contextualTillToken; waitedToken = Tok_EQ; comeAfter = &classSpecif; break; case Tok_BRL: toDoNext = &tillToken; waitedToken = Tok_BRR; comeAfter = &classDecl; break; default: break; } } /* Handle a global * let ident ... * or * let rec ident ... */ static void globalLet (vString * const ident, ocaToken what) { switch (what) { case Tok_PARL: /* We ignore this token to be able to parse such * declarations : * let (ident : type) = ... */ break; case OcaKEYWORD_mutable: case OcaKEYWORD_virtual: case OcaKEYWORD_rec: /* just ignore to be able to parse such declarations: * let rec ident = ... */ break; case Tok_Op: /* we are defining a new operator, it's a * function definition */ addTag (ident, K_FUNCTION); pushStrongContext (ident, ContextFunction); toDoNext = &letParam; break; case OcaIDENTIFIER: addTag (ident, K_VAR); pushStrongContext (ident, ContextValue); requestStrongPoping (); toDoNext = &letParam; break; case OcaKEYWORD_end: popStrongContext (); break; default: toDoNext = &globalScope; break; } } /* Handle the "strong" top levels, all 'big' declarations * happen here */ static void globalScope (vString * const UNUSED (ident), ocaToken what) { /* Do not touch, this is used only by the global scope * to handle an 'and' */ static parseNext previousParser = &globalScope; switch (what) { case OcaKEYWORD_and: cleanupPreviousParser (); toDoNext = previousParser; break; case OcaKEYWORD_type: cleanupPreviousParser (); toDoNext = &typeDecl; previousParser = &typeDecl; break; case OcaKEYWORD_class: cleanupPreviousParser (); toDoNext = &classDecl; previousParser = &classDecl; break; case OcaKEYWORD_module: cleanupPreviousParser (); toDoNext = &moduleDecl; previousParser = &moduleDecl; break; case OcaKEYWORD_end: needStrongPoping = FALSE; killCurrentState (); /*popStrongContext(); */ break; case OcaKEYWORD_method: cleanupPreviousParser (); toDoNext = &methodDecl; /* and is not allowed in methods */ break; /* val is mixed with let as global * to be able to handle mli & new syntax */ case OcaKEYWORD_val: case OcaKEYWORD_value: case OcaKEYWORD_let: cleanupPreviousParser (); toDoNext = &globalLet; previousParser = &globalLet; break; case OcaKEYWORD_exception: cleanupPreviousParser (); toDoNext = &exceptionDecl; previousParser = &globalScope; break; /* must be a #line directive, discard the * whole line. */ case Tok_Sharp: /* ignore */ break; default: /* we don't care */ break; } } /* Parse expression. Well ignore it is more the case, * ignore all tokens except "shocking" keywords */ static void localScope (vString * const ident, ocaToken what) { switch (what) { case Tok_Pipe: case Tok_PARR: case Tok_BRR: case Tok_CurlR: popSoftContext (); break; /* Everything that `begin` has an `end` * as end is overloaded and signal many end * of things, we add an empty strong context to * avoid problem with the end. */ case OcaKEYWORD_begin: pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL); toDoNext = &mayRedeclare; break; case OcaKEYWORD_in: popLastNamed (); break; /* Ok, we got a '{', which is much likely to create * a record. We cannot treat it like other [ && (, * because it may contain the 'with' keyword and screw * everything else. */ case Tok_CurlL: toDoNext = &contextualTillToken; waitedToken = Tok_CurlR; comeAfter = &localScope; contextualTillToken (ident, what); break; /* Yeah imperative feature of OCaml, * a ';' like in C */ case Tok_semi: toDoNext = &mayRedeclare; break; case Tok_PARL: case Tok_BRL: pushEmptyContext (&localScope); toDoNext = &mayRedeclare; break; case OcaKEYWORD_and: popLastNamed (); toDoNext = &localLet; break; case OcaKEYWORD_else: case OcaKEYWORD_then: popSoftContext (); pushEmptyContext (&localScope); toDoNext = &mayRedeclare; break; case OcaKEYWORD_if: pushEmptyContext (&localScope); toDoNext = &mayRedeclare; break; case OcaKEYWORD_match: pushEmptyContext (&localScope); toDoNext = &mayRedeclare; break; case OcaKEYWORD_with: popSoftContext (); toDoNext = &matchPattern; pushEmptyContext (&matchPattern); break; case OcaKEYWORD_end: killCurrentState (); break; case OcaKEYWORD_fun: comeAfter = &mayRedeclare; toDoNext = &tillToken; waitedToken = Tok_To; break; case OcaKEYWORD_done: case OcaKEYWORD_val: /* doesn't care */ break; default: requestStrongPoping (); globalScope (ident, what); break; } } /*//////////////////////////////////////////////////////////////// //// Deal with the system */ /* in OCaml the file name is the module name used in the language * with it first letter put in upper case */ static void computeModuleName ( void ) { /* in Ocaml the file name define a module. * so we define a module =) */ const char *filename = getSourceFileName (); int beginIndex = 0; int endIndex = strlen (filename) - 1; vString *moduleName = vStringNew (); while (filename[endIndex] != '.' && endIndex > 0) endIndex--; /* avoid problem with path in front of filename */ beginIndex = endIndex; while (beginIndex > 0) { if (filename[beginIndex] == '\\' || filename[beginIndex] == '/') { beginIndex++; break; } beginIndex--; } vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex); vStringTerminate (moduleName); if (isLowerAlpha (moduleName->buffer[0])) moduleName->buffer[0] += ('A' - 'a'); addTag (moduleName, K_MODULE); vStringDelete (moduleName); } /* Allocate all string of the context stack */ static void initStack ( void ) { int i; for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i) stack[i].contextName = vStringNew (); stackIndex = 0; } static void clearStack ( void ) { int i; for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i) vStringDelete (stack[i].contextName); } static void findOcamlTags (void) { vString *name = vStringNew (); lexingState st; ocaToken tok; initStack (); computeModuleName (); tempIdent = vStringNew (); lastModule = vStringNew (); lastClass = vStringNew (); voidName = vStringNew (); vStringCopyS (voidName, "_"); st.name = vStringNew (); st.cp = fileReadLine (); toDoNext = &globalScope; tok = lex (&st); while (tok != Tok_EOF) { (*toDoNext) (st.name, tok); tok = lex (&st); } vStringDelete (name); vStringDelete (voidName); vStringDelete (tempIdent); vStringDelete (lastModule); vStringDelete (lastClass); clearStack (); } static void ocamlInitialize (const langType language) { Lang_Ocaml = language; initOperatorTable (); initKeywordHash (); } extern parserDefinition *OcamlParser (void) { static const char *const extensions[] = { "ml", "mli", NULL }; parserDefinition *def = parserNew ("OCaml"); def->kinds = OcamlKinds; def->kindCount = KIND_COUNT (OcamlKinds); def->extensions = extensions; def->parser = findOcamlTags; def->initialize = ocamlInitialize; return def; }