cosmopolitan/third_party/ctags/ocaml.c
2022-11-13 13:26:28 -08:00

1895 lines
39 KiB
C

// clang-format off
/*
* Copyright (c) 2009, Vincent Berthoux
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for Objective Caml
* language files.
*/
/*
* INCLUDE FILES
*/
#include "third_party/ctags/general.h" /* must always come first */
#include "libc/mem/alg.h"
#include "libc/str/str.h"
#include "third_party/ctags/keyword.h"
#include "third_party/ctags/entry.h"
#include "third_party/ctags/options.h"
#include "third_party/ctags/read.h"
#include "third_party/ctags/routines.h"
#include "third_party/ctags/vstring.h"
/* To get rid of unused parameter warning in
* -Wextra */
#ifdef UNUSED
#elif defined(__GNUC__)
# define UNUSED(x) UNUSED_ ## x __attribute__((unused))
#elif defined(__LCLINT__)
# define UNUSED(x) /*@unused@*/ x
#else
# define UNUSED(x) x
#endif
#define OCAML_MAX_STACK_SIZE 256
typedef enum {
K_CLASS, /* Ocaml class, relatively rare */
K_METHOD, /* class method */
K_MODULE, /* Ocaml module OR functor */
K_VAR,
K_TYPE, /* name of an OCaml type */
K_FUNCTION,
K_CONSTRUCTOR, /* Constructor of a sum type */
K_RECORDFIELD,
K_EXCEPTION
} ocamlKind;
static kindOption OcamlKinds[] = {
{TRUE, 'c', "class", "classes"},
{TRUE, 'm', "method", "Object's method"},
{TRUE, 'M', "module", "Module or functor"},
{TRUE, 'v', "var", "Global variable"},
{TRUE, 't', "type", "Type name"},
{TRUE, 'f', "function", "A function"},
{TRUE, 'C', "Constructor", "A constructor"},
{TRUE, 'r', "Record field", "A 'structure' field"},
{TRUE, 'e', "Exception", "An exception"}
};
typedef enum {
OcaKEYWORD_and,
OcaKEYWORD_begin,
OcaKEYWORD_class,
OcaKEYWORD_do,
OcaKEYWORD_done,
OcaKEYWORD_else,
OcaKEYWORD_end,
OcaKEYWORD_exception,
OcaKEYWORD_for,
OcaKEYWORD_functor,
OcaKEYWORD_fun,
OcaKEYWORD_if,
OcaKEYWORD_in,
OcaKEYWORD_let,
OcaKEYWORD_value,
OcaKEYWORD_match,
OcaKEYWORD_method,
OcaKEYWORD_module,
OcaKEYWORD_mutable,
OcaKEYWORD_object,
OcaKEYWORD_of,
OcaKEYWORD_rec,
OcaKEYWORD_sig,
OcaKEYWORD_struct,
OcaKEYWORD_then,
OcaKEYWORD_try,
OcaKEYWORD_type,
OcaKEYWORD_val,
OcaKEYWORD_virtual,
OcaKEYWORD_while,
OcaKEYWORD_with,
OcaIDENTIFIER,
Tok_PARL, /* '(' */
Tok_PARR, /* ')' */
Tok_BRL, /* '[' */
Tok_BRR, /* ']' */
Tok_CurlL, /* '{' */
Tok_CurlR, /* '}' */
Tok_Prime, /* '\'' */
Tok_Pipe, /* '|' */
Tok_EQ, /* '=' */
Tok_Val, /* string/number/poo */
Tok_Op, /* any operator recognized by the language */
Tok_semi, /* ';' */
Tok_comma, /* ',' */
Tok_To, /* '->' */
Tok_Sharp, /* '#' */
Tok_Backslash, /* '\\' */
Tok_EOF /* END of file */
} ocamlKeyword;
typedef struct sOcaKeywordDesc {
const char *name;
ocamlKeyword id;
} ocaKeywordDesc;
typedef ocamlKeyword ocaToken;
static const ocaKeywordDesc OcamlKeywordTable[] = {
{ "and" , OcaKEYWORD_and },
{ "begin" , OcaKEYWORD_begin },
{ "class" , OcaKEYWORD_class },
{ "do" , OcaKEYWORD_do },
{ "done" , OcaKEYWORD_done },
{ "else" , OcaKEYWORD_else },
{ "end" , OcaKEYWORD_end },
{ "exception" , OcaKEYWORD_exception },
{ "for" , OcaKEYWORD_for },
{ "fun" , OcaKEYWORD_fun },
{ "function" , OcaKEYWORD_fun },
{ "functor" , OcaKEYWORD_functor },
{ "in" , OcaKEYWORD_in },
{ "let" , OcaKEYWORD_let },
{ "match" , OcaKEYWORD_match },
{ "method" , OcaKEYWORD_method },
{ "module" , OcaKEYWORD_module },
{ "mutable" , OcaKEYWORD_mutable },
{ "object" , OcaKEYWORD_object },
{ "of" , OcaKEYWORD_of },
{ "rec" , OcaKEYWORD_rec },
{ "sig" , OcaKEYWORD_sig },
{ "struct" , OcaKEYWORD_struct },
{ "then" , OcaKEYWORD_then },
{ "try" , OcaKEYWORD_try },
{ "type" , OcaKEYWORD_type },
{ "val" , OcaKEYWORD_val },
{ "value" , OcaKEYWORD_value }, /* just to handle revised syntax */
{ "virtual" , OcaKEYWORD_virtual },
{ "while" , OcaKEYWORD_while },
{ "with" , OcaKEYWORD_with },
{ "or" , Tok_Op },
{ "mod " , Tok_Op },
{ "land " , Tok_Op },
{ "lor " , Tok_Op },
{ "lxor " , Tok_Op },
{ "lsl " , Tok_Op },
{ "lsr " , Tok_Op },
{ "asr" , Tok_Op },
{ "->" , Tok_To },
{ "true" , Tok_Val },
{ "false" , Tok_Val }
};
static langType Lang_Ocaml;
boolean exportLocalInfo = FALSE;
/*//////////////////////////////////////////////////////////////////
//// lexingInit */
typedef struct _lexingState {
vString *name; /* current parsed identifier/operator */
const unsigned char *cp; /* position in stream */
} lexingState;
/* array of the size of all possible value for a char */
boolean isOperator[1 << (8 * sizeof (char))] = { FALSE };
static void initKeywordHash ( void )
{
const size_t count = sizeof (OcamlKeywordTable) / sizeof (ocaKeywordDesc);
size_t i;
for (i = 0; i < count; ++i)
{
addKeyword (OcamlKeywordTable[i].name, Lang_Ocaml,
(int) OcamlKeywordTable[i].id);
}
}
/* definition of all the operator in OCaml,
* /!\ certain operator get special treatment
* in regards of their role in OCaml grammar :
* '|' ':' '=' '~' and '?' */
static void initOperatorTable ( void )
{
isOperator['!'] = TRUE;
isOperator['$'] = TRUE;
isOperator['%'] = TRUE;
isOperator['&'] = TRUE;
isOperator['*'] = TRUE;
isOperator['+'] = TRUE;
isOperator['-'] = TRUE;
isOperator['.'] = TRUE;
isOperator['/'] = TRUE;
isOperator[':'] = TRUE;
isOperator['<'] = TRUE;
isOperator['='] = TRUE;
isOperator['>'] = TRUE;
isOperator['?'] = TRUE;
isOperator['@'] = TRUE;
isOperator['^'] = TRUE;
isOperator['~'] = TRUE;
isOperator['|'] = TRUE;
}
/*//////////////////////////////////////////////////////////////////////
//// Lexing */
static boolean isNum (char c)
{
return c >= '0' && c <= '9';
}
static boolean isLowerAlpha (char c)
{
return c >= 'a' && c <= 'z';
}
static boolean isUpperAlpha (char c)
{
return c >= 'A' && c <= 'Z';
}
static boolean isAlpha (char c)
{
return isLowerAlpha (c) || isUpperAlpha (c);
}
static boolean isIdent (char c)
{
return isNum (c) || isAlpha (c) || c == '_' || c == '\'';
}
static boolean isSpace (char c)
{
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}
static void eatWhiteSpace (lexingState * st)
{
const unsigned char *cp = st->cp;
while (isSpace (*cp))
cp++;
st->cp = cp;
}
static void eatString (lexingState * st)
{
boolean lastIsBackSlash = FALSE;
boolean unfinished = TRUE;
const unsigned char *c = st->cp + 1;
while (unfinished)
{
/* end of line should never happen.
* we tolerate it */
if (c == NULL || c[0] == '\0')
break;
else if (*c == '"' && !lastIsBackSlash)
unfinished = FALSE;
else
lastIsBackSlash = *c == '\\';
c++;
}
st->cp = c;
}
static void eatComment (lexingState * st)
{
boolean unfinished = TRUE;
boolean lastIsStar = FALSE;
const unsigned char *c = st->cp + 2;
while (unfinished)
{
/* we've reached the end of the line..
* so we have to reload a line... */
if (c == NULL || *c == '\0')
{
st->cp = fileReadLine ();
/* WOOPS... no more input...
* we return, next lexing read
* will be null and ok */
if (st->cp == NULL)
return;
c = st->cp;
}
/* we've reached the end of the comment */
else if (*c == ')' && lastIsStar)
unfinished = FALSE;
/* here we deal with imbricated comment, which
* are allowed in OCaml */
else if (c[0] == '(' && c[1] == '*')
{
st->cp = c;
eatComment (st);
c = st->cp;
if (c == NULL)
return;
lastIsStar = FALSE;
c++;
}
/* OCaml has a rule which says :
*
* "Comments do not occur inside string or character literals.
* Nested comments are handled correctly."
*
* So if we encounter a string beginning, we must parse it to
* get a good comment nesting (bug ID: 3117537)
*/
else if (*c == '"')
{
st->cp = c;
eatString (st);
c = st->cp;
}
else
{
lastIsStar = '*' == *c;
c++;
}
}
st->cp = c;
}
static void readIdentifier (lexingState * st)
{
const unsigned char *p;
vStringClear (st->name);
/* first char is a simple letter */
if (isAlpha (*st->cp) || *st->cp == '_')
vStringPut (st->name, (int) *st->cp);
/* Go till you get identifier chars */
for (p = st->cp + 1; isIdent (*p); p++)
vStringPut (st->name, (int) *p);
st->cp = p;
vStringTerminate (st->name);
}
static ocamlKeyword eatNumber (lexingState * st)
{
while (isNum (*st->cp))
st->cp++;
return Tok_Val;
}
/* Operator can be defined in OCaml as a function
* so we must be ample enough to parse them normally */
static ocamlKeyword eatOperator (lexingState * st)
{
int count = 0;
const unsigned char *root = st->cp;
vStringClear (st->name);
while (isOperator[st->cp[count]])
{
vStringPut (st->name, st->cp[count]);
count++;
}
vStringTerminate (st->name);
st->cp += count;
if (count <= 1)
{
switch (root[0])
{
case '|':
return Tok_Pipe;
case '=':
return Tok_EQ;
default:
return Tok_Op;
}
}
else if (count == 2 && root[0] == '-' && root[1] == '>')
return Tok_To;
else
return Tok_Op;
}
/* The lexer is in charge of reading the file.
* Some of sub-lexer (like eatComment) also read file.
* lexing is finished when the lexer return Tok_EOF */
static ocamlKeyword lex (lexingState * st)
{
int retType;
/* handling data input here */
while (st->cp == NULL || st->cp[0] == '\0')
{
st->cp = fileReadLine ();
if (st->cp == NULL)
return Tok_EOF;
}
if (isAlpha (*st->cp))
{
readIdentifier (st);
retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml);
if (retType == -1) /* If it's not a keyword */
{
return OcaIDENTIFIER;
}
else
{
return retType;
}
}
else if (isNum (*st->cp))
return eatNumber (st);
else if (isSpace (*st->cp))
{
eatWhiteSpace (st);
return lex (st);
}
/* OCaml permit the definition of our own operators
* so here we check all the consecuting chars which
* are operators to discard them. */
else if (isOperator[*st->cp])
return eatOperator (st);
else
switch (*st->cp)
{
case '(':
if (st->cp[1] == '*') /* ergl, a comment */
{
eatComment (st);
return lex (st);
}
else
{
st->cp++;
return Tok_PARL;
}
case ')':
st->cp++;
return Tok_PARR;
case '[':
st->cp++;
return Tok_BRL;
case ']':
st->cp++;
return Tok_BRR;
case '{':
st->cp++;
return Tok_CurlL;
case '}':
st->cp++;
return Tok_CurlR;
case '\'':
st->cp++;
return Tok_Prime;
case ',':
st->cp++;
return Tok_comma;
case '=':
st->cp++;
return Tok_EQ;
case ';':
st->cp++;
return Tok_semi;
case '"':
eatString (st);
return Tok_Val;
case '_':
st->cp++;
return Tok_Val;
case '#':
st->cp++;
return Tok_Sharp;
case '\\':
st->cp++;
return Tok_Backslash;
default:
st->cp++;
break;
}
/* default return if nothing is recognized,
* shouldn't happen, but at least, it will
* be handled without destroying the parsing. */
return Tok_Val;
}
/*//////////////////////////////////////////////////////////////////////
//// Parsing */
typedef void (*parseNext) (vString * const ident, ocaToken what);
/********** Helpers */
/* This variable hold the 'parser' which is going to
* handle the next token */
static parseNext toDoNext;
/* Special variable used by parser eater to
* determine which action to put after their
* job is finished. */
static parseNext comeAfter;
/* If a token put an end to current delcaration/
* statement */
static ocaToken terminatingToken;
/* Token to be searched by the different
* parser eater. */
static ocaToken waitedToken;
/* name of the last class, used for
* context stacking. */
static vString *lastClass;
static vString *voidName;
typedef enum _sContextKind {
ContextStrong,
ContextSoft
} contextKind;
typedef enum _sContextType {
ContextType,
ContextModule,
ContextClass,
ContextValue,
ContextFunction,
ContextMethod,
ContextBlock
} contextType;
typedef struct _sOcamlContext {
contextKind kind; /* well if the context is strong or not */
contextType type;
parseNext callback; /* what to do when a context is pop'd */
vString *contextName; /* name, if any, of the surrounding context */
} ocamlContext;
/* context stack, can be used to output scope information
* into the tag file. */
static ocamlContext stack[OCAML_MAX_STACK_SIZE];
/* current position in the tag */
static int stackIndex;
/* special function, often recalled, so putting it here */
static void globalScope (vString * const ident, ocaToken what);
/* Return : index of the last named context if one
* is found, -1 otherwise */
static int getLastNamedIndex ( void )
{
int i;
for (i = stackIndex - 1; i >= 0; --i)
{
if (vStringLength (stack[i].contextName) > 0)
{
return i;
}
}
return -1;
}
static const char *contextDescription (contextType t)
{
switch (t)
{
case ContextFunction:
return "function";
case ContextMethod:
return "method";
case ContextValue:
return "value";
case ContextModule:
return "Module";
case ContextType:
return "type";
case ContextClass:
return "class";
case ContextBlock:
return "begin/end";
}
return NULL;
}
static char contextTypeSuffix (contextType t)
{
switch (t)
{
case ContextFunction:
case ContextMethod:
case ContextValue:
case ContextModule:
return '/';
case ContextType:
return '.';
case ContextClass:
return '#';
case ContextBlock:
return ' ';
}
return '$';
}
/* Push a new context, handle null string */
static void pushContext (contextKind kind, contextType type, parseNext after,
vString const *contextName)
{
int parentIndex;
if (stackIndex >= OCAML_MAX_STACK_SIZE)
{
verbose ("OCaml Maximum depth reached");
return;
}
stack[stackIndex].kind = kind;
stack[stackIndex].type = type;
stack[stackIndex].callback = after;
parentIndex = getLastNamedIndex ();
if (contextName == NULL)
{
vStringClear (stack[stackIndex++].contextName);
return;
}
if (parentIndex >= 0)
{
vStringCopy (stack[stackIndex].contextName,
stack[parentIndex].contextName);
vStringPut (stack[stackIndex].contextName,
contextTypeSuffix (stack[parentIndex].type));
vStringCat (stack[stackIndex].contextName, contextName);
}
else
vStringCopy (stack[stackIndex].contextName, contextName);
stackIndex++;
}
static void pushStrongContext (vString * name, contextType type)
{
pushContext (ContextStrong, type, &globalScope, name);
}
static void pushSoftContext (parseNext continuation,
vString * name, contextType type)
{
pushContext (ContextSoft, type, continuation, name);
}
static void pushEmptyContext (parseNext continuation)
{
pushContext (ContextSoft, ContextValue, continuation, NULL);
}
/* unroll the stack until the last named context.
* then discard it. Used to handle the :
* let f x y = ...
* in ...
* where the context is reseted after the in. Context may have
* been really nested before that. */
static void popLastNamed ( void )
{
int i = getLastNamedIndex ();
if (i >= 0)
{
stackIndex = i;
toDoNext = stack[i].callback;
vStringClear (stack[i].contextName);
}
else
{
/* ok, no named context found...
* (should not happen). */
stackIndex = 0;
toDoNext = &globalScope;
}
}
/* pop a context without regarding it's content
* (beside handling empty stack case) */
static void popSoftContext ( void )
{
if (stackIndex <= 0)
{
toDoNext = &globalScope;
}
else
{
stackIndex--;
toDoNext = stack[stackIndex].callback;
vStringClear (stack[stackIndex].contextName);
}
}
/* Reset everything until the last global space.
* a strong context can be :
* - module
* - class definition
* - the initial global space
* - a _global_ delcaration (let at global scope or in a module).
* Created to exit quickly deeply nested context */
static contextType popStrongContext ( void )
{
int i;
for (i = stackIndex - 1; i >= 0; --i)
{
if (stack[i].kind == ContextStrong)
{
stackIndex = i;
toDoNext = stack[i].callback;
vStringClear (stack[i].contextName);
return stack[i].type;
}
}
/* ok, no strong context found... */
stackIndex = 0;
toDoNext = &globalScope;
return -1;
}
/* Ignore everything till waitedToken and jump to comeAfter.
* If the "end" keyword is encountered break, doesn't remember
* why though. */
static void tillToken (vString * const UNUSED (ident), ocaToken what)
{
if (what == waitedToken)
toDoNext = comeAfter;
else if (what == OcaKEYWORD_end)
{
popStrongContext ();
toDoNext = &globalScope;
}
}
/* Ignore everything till a waitedToken is seen, but
* take care of balanced parentheses/bracket use */
static void contextualTillToken (vString * const UNUSED (ident), ocaToken what)
{
static int parentheses = 0;
static int bracket = 0;
static int curly = 0;
switch (what)
{
case Tok_PARL:
parentheses--;
break;
case Tok_PARR:
parentheses++;
break;
case Tok_CurlL:
curly--;
break;
case Tok_CurlR:
curly++;
break;
case Tok_BRL:
bracket--;
break;
case Tok_BRR:
bracket++;
break;
default: /* other token are ignored */
break;
}
if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0)
toDoNext = comeAfter;
else if (what == OcaKEYWORD_end)
{
popStrongContext ();
toDoNext = &globalScope;
}
}
/* Wait for waitedToken and jump to comeAfter or let
* the globalScope handle declarations */
static void tillTokenOrFallback (vString * const ident, ocaToken what)
{
if (what == waitedToken)
toDoNext = comeAfter;
else
globalScope (ident, what);
}
/* ignore token till waitedToken, or give up if find
* terminatingToken. Use globalScope to handle new
* declarations. */
static void tillTokenOrTerminatingOrFallback (vString * const ident,
ocaToken what)
{
if (what == waitedToken)
toDoNext = comeAfter;
else if (what == terminatingToken)
toDoNext = globalScope;
else
globalScope (ident, what);
}
/* ignore the next token in the stream and jump to the
* given comeAfter state */
static void ignoreToken (vString * const UNUSED (ident), ocaToken UNUSED (what))
{
toDoNext = comeAfter;
}
/********** Grammar */
/* the purpose of each function is detailled near their
* implementation */
static void killCurrentState ( void )
{
/* Tracking the kind of previous strong
* context, if it doesn't match with a
* really strong entity, repop */
switch (popStrongContext ())
{
case ContextValue:
popStrongContext ();
break;
case ContextFunction:
popStrongContext ();
break;
case ContextMethod:
popStrongContext ();
break;
case ContextType:
popStrongContext();
break;
case ContextBlock:
break;
case ContextModule:
break;
case ContextClass:
break;
default:
/* nothing more */
break;
}
}
/* used to prepare tag for OCaml, just in case their is a need to
* add additional information to the tag. */
static void prepareTag (tagEntryInfo * tag, vString const *name, ocamlKind kind)
{
int parentIndex;
initTagEntry (tag, vStringValue (name));
tag->kindName = OcamlKinds[kind].name;
tag->kind = OcamlKinds[kind].letter;
if (kind == K_MODULE)
{
tag->lineNumberEntry = TRUE;
tag->lineNumber = 1;
}
parentIndex = getLastNamedIndex ();
if (parentIndex >= 0)
{
tag->extensionFields.scope[0] =
contextDescription (stack[parentIndex].type);
tag->extensionFields.scope[1] =
vStringValue (stack[parentIndex].contextName);
}
}
/* Used to centralise tag creation, and be able to add
* more information to it in the future */
static void addTag (vString * const ident, int kind)
{
if (OcamlKinds [kind].enabled && ident != NULL && vStringLength (ident) > 0)
{
tagEntryInfo toCreate;
prepareTag (&toCreate, ident, kind);
makeTagEntry (&toCreate);
}
}
boolean needStrongPoping = FALSE;
static void requestStrongPoping ( void )
{
needStrongPoping = TRUE;
}
static void cleanupPreviousParser ( void )
{
if (needStrongPoping)
{
needStrongPoping = FALSE;
popStrongContext ();
}
}
/* Due to some circular dependencies, the following functions
* must be forward-declared. */
static void letParam (vString * const ident, ocaToken what);
static void localScope (vString * const ident, ocaToken what);
static void mayRedeclare (vString * const ident, ocaToken what);
static void typeSpecification (vString * const ident, ocaToken what);
/*
* Parse a record type
* type ident = // parsed previously
* {
* ident1: type1;
* ident2: type2;
* }
*/
static void typeRecord (vString * const ident, ocaToken what)
{
switch (what)
{
case OcaIDENTIFIER:
addTag (ident, K_RECORDFIELD);
terminatingToken = Tok_CurlR;
waitedToken = Tok_semi;
comeAfter = &typeRecord;
toDoNext = &tillTokenOrTerminatingOrFallback;
break;
case OcaKEYWORD_mutable:
/* ignore it */
break;
case Tok_CurlR:
popStrongContext ();
toDoNext = &globalScope;
break;
default: /* don't care */
break;
}
}
/* handle :
* exception ExceptionName of ... */
static void exceptionDecl (vString * const ident, ocaToken what)
{
if (what == OcaIDENTIFIER)
{
addTag (ident, K_EXCEPTION);
}
else /* probably ill-formed, give back to global scope */
{
globalScope (ident, what);
}
toDoNext = &globalScope;
}
tagEntryInfo tempTag;
vString *tempIdent;
/* Ensure a constructor is not a type path beginning
* with a module */
static void constructorValidation (vString * const ident, ocaToken what)
{
switch (what)
{
case Tok_Op: /* if we got a '.' which is an operator */
toDoNext = &globalScope;
popStrongContext ();
needStrongPoping = FALSE;
break;
case OcaKEYWORD_of: /* OK, it must be a constructor :) */
makeTagEntry (&tempTag);
vStringClear (tempIdent);
toDoNext = &tillTokenOrFallback;
comeAfter = &typeSpecification;
waitedToken = Tok_Pipe;
break;
case Tok_Pipe: /* OK, it was a constructor :) */
makeTagEntry (&tempTag);
vStringClear (tempIdent);
toDoNext = &typeSpecification;
break;
default: /* and mean that we're not facing a module name */
makeTagEntry (&tempTag);
vStringClear (tempIdent);
toDoNext = &tillTokenOrFallback;
comeAfter = &typeSpecification;
waitedToken = Tok_Pipe;
/* nothing in the context, discard it */
popStrongContext ();
/* to be sure we use this token */
globalScope (ident, what);
}
}
/* Parse beginning of type definition
* type 'avar ident =
* or
* type ('var1, 'var2) ident =
*/
static void typeDecl (vString * const ident, ocaToken what)
{
switch (what)
{
/* parameterized */
case Tok_Prime:
comeAfter = &typeDecl;
toDoNext = &ignoreToken;
break;
/* LOTS of parameters */
case Tok_PARL:
comeAfter = &typeDecl;
waitedToken = Tok_PARR;
toDoNext = &tillToken;
break;
case OcaIDENTIFIER:
addTag (ident, K_TYPE);
pushStrongContext (ident, ContextType);
requestStrongPoping ();
waitedToken = Tok_EQ;
comeAfter = &typeSpecification;
toDoNext = &tillTokenOrFallback;
break;
default:
globalScope (ident, what);
}
}
/* Parse type of kind
* type bidule = Ctor1 of ...
* | Ctor2
* | Ctor3 of ...
* or
* type bidule = | Ctor1 of ... | Ctor2
*
* when type bidule = { ... } is detected,
* let typeRecord handle it. */
static void typeSpecification (vString * const ident, ocaToken what)
{
switch (what)
{
case OcaIDENTIFIER:
if (isUpperAlpha (ident->buffer[0]))
{
/* here we handle type aliases of type
* type foo = AnotherModule.bar
* AnotherModule can mistakenly be took
* for a constructor. */
vStringCopy (tempIdent, ident);
prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR);
toDoNext = &constructorValidation;
}
else
{
toDoNext = &tillTokenOrFallback;
comeAfter = &typeSpecification;
waitedToken = Tok_Pipe;
}
break;
case OcaKEYWORD_and:
toDoNext = &typeDecl;
break;
case Tok_BRL: /* the '[' & ']' are ignored to accommodate */
case Tok_BRR: /* with the revised syntax */
case Tok_Pipe:
/* just ignore it */
break;
case Tok_CurlL:
toDoNext = &typeRecord;
break;
default: /* don't care */
break;
}
}
static boolean dirtySpecialParam = FALSE;
/* parse the ~label and ~label:type parameter */
static void parseLabel (vString * const ident, ocaToken what)
{
static int parCount = 0;
switch (what)
{
case OcaIDENTIFIER:
if (!dirtySpecialParam)
{
if (exportLocalInfo)
addTag (ident, K_VAR);
dirtySpecialParam = TRUE;
}
break;
case Tok_PARL:
parCount++;
break;
case Tok_PARR:
parCount--;
if (parCount == 0)
toDoNext = &letParam;
break;
case Tok_Op:
if (ident->buffer[0] == ':')
{
toDoNext = &ignoreToken;
comeAfter = &letParam;
}
else if (parCount == 0 && dirtySpecialParam)
{
toDoNext = &letParam;
letParam (ident, what);
}
break;
default:
if (parCount == 0 && dirtySpecialParam)
{
toDoNext = &letParam;
letParam (ident, what);
}
break;
}
}
/* Optional argument with syntax like this :
* ?(foo = value) */
static void parseOptionnal (vString * const ident, ocaToken what)
{
static int parCount = 0;
switch (what)
{
case OcaIDENTIFIER:
if (!dirtySpecialParam)
{
if (exportLocalInfo)
addTag (ident, K_VAR);
dirtySpecialParam = TRUE;
if (parCount == 0)
toDoNext = &letParam;
}
break;
case Tok_PARL:
parCount++;
break;
case Tok_PARR:
parCount--;
if (parCount == 0)
toDoNext = &letParam;
break;
default: /* don't care */
break;
}
}
/** handle let inside functions (so like it's name
* say : local let */
static void localLet (vString * const ident, ocaToken what)
{
switch (what)
{
case Tok_PARL:
/* We ignore this token to be able to parse such
* declarations :
* let (ident : type) = ...
*/
break;
case OcaKEYWORD_rec:
/* just ignore to be able to parse such declarations:
* let rec ident = ... */
break;
case Tok_Op:
/* we are defining a new operator, it's a
* function definition */
if (exportLocalInfo)
addTag (ident, K_FUNCTION);
pushSoftContext (mayRedeclare, ident, ContextFunction);
toDoNext = &letParam;
break;
/* Can be a weiiird binding, or an '_' */
case Tok_Val:
if (exportLocalInfo)
addTag (ident, K_VAR);
pushSoftContext (mayRedeclare, ident, ContextValue);
toDoNext = &letParam;
break;
case OcaIDENTIFIER:
if (exportLocalInfo)
addTag (ident, K_VAR);
pushSoftContext (mayRedeclare, ident, ContextValue);
toDoNext = &letParam;
break;
case OcaKEYWORD_end:
popStrongContext ();
break;
default:
toDoNext = &localScope;
break;
}
}
/* parse :
* | pattern pattern -> ...
* or
* pattern apttern apttern -> ...
* we ignore all identifiers declared in the pattern,
* because their scope is likely to be even more limited
* than the let definitions.
* Used after a match ... with, or a function ... or fun ...
* because their syntax is similar. */
static void matchPattern (vString * const ident, ocaToken what)
{
/* keep track of [], as it
* can be used in patterns and can
* mean the end of match expression in
* revised syntax */
static int braceCount = 0;
switch (what)
{
case Tok_To:
pushEmptyContext (&matchPattern);
toDoNext = &mayRedeclare;
break;
case Tok_BRL:
braceCount++;
break;
case OcaKEYWORD_value:
popLastNamed ();
globalScope (ident, what);
break;
case OcaKEYWORD_in:
popLastNamed ();
break;
default:
break;
}
}
/* Used at the beginning of a new scope (begin of a
* definition, parenthesis...) to catch inner let
* definition that may be in. */
static void mayRedeclare (vString * const ident, ocaToken what)
{
switch (what)
{
case OcaKEYWORD_value:
// let globalScope handle it
globalScope (ident, what);
break;
case OcaKEYWORD_let:
case OcaKEYWORD_val:
toDoNext = localLet;
break;
case OcaKEYWORD_object:
vStringClear (lastClass);
pushContext (ContextStrong, ContextClass,
&localScope, NULL /*voidName */ );
needStrongPoping = FALSE;
toDoNext = &globalScope;
break;
case OcaKEYWORD_for:
case OcaKEYWORD_while:
toDoNext = &tillToken;
waitedToken = OcaKEYWORD_do;
comeAfter = &mayRedeclare;
break;
case OcaKEYWORD_try:
toDoNext = &mayRedeclare;
pushSoftContext (matchPattern, ident, ContextFunction);
break;
case OcaKEYWORD_fun:
toDoNext = &matchPattern;
break;
/* Handle the special ;; from the OCaml
* Top level */
case Tok_semi:
default:
toDoNext = &localScope;
localScope (ident, what);
}
}
/* parse :
* p1 p2 ... pn = ...
* or
* ?(p1=v) p2 ~p3 ~pn:ja ... = ... */
static void letParam (vString * const ident, ocaToken what)
{
switch (what)
{
case Tok_EQ:
toDoNext = &mayRedeclare;
break;
case OcaIDENTIFIER:
if (exportLocalInfo)
addTag (ident, K_VAR);
break;
case Tok_Op:
switch (ident->buffer[0])
{
case ':':
/*popSoftContext(); */
/* we got a type signature */
comeAfter = &mayRedeclare;
toDoNext = &tillTokenOrFallback;
waitedToken = Tok_EQ;
break;
/* parse something like
* ~varname:type
* or
* ~varname
* or
* ~(varname: long type) */
case '~':
toDoNext = &parseLabel;
dirtySpecialParam = FALSE;
break;
/* Optional argument with syntax like this :
* ?(bla = value)
* or
* ?bla */
case '?':
toDoNext = &parseOptionnal;
dirtySpecialParam = FALSE;
break;
default:
break;
}
break;
default: /* don't care */
break;
}
}
/* parse object ...
* used to be sure the class definition is not a type
* alias */
static void classSpecif (vString * const UNUSED (ident), ocaToken what)
{
switch (what)
{
case OcaKEYWORD_object:
pushStrongContext (lastClass, ContextClass);
toDoNext = &globalScope;
break;
default:
vStringClear (lastClass);
toDoNext = &globalScope;
}
}
/* Handle a method ... class declaration.
* nearly a copy/paste of globalLet. */
static void methodDecl (vString * const ident, ocaToken what)
{
switch (what)
{
case Tok_PARL:
/* We ignore this token to be able to parse such
* declarations :
* let (ident : type) = ... */
break;
case OcaKEYWORD_mutable:
case OcaKEYWORD_virtual:
case OcaKEYWORD_rec:
/* just ignore to be able to parse such declarations:
* let rec ident = ... */
break;
case OcaIDENTIFIER:
addTag (ident, K_METHOD);
/* Normal pushing to get good subs */
pushStrongContext (ident, ContextMethod);
/*pushSoftContext( globalScope, ident, ContextMethod ); */
toDoNext = &letParam;
break;
case OcaKEYWORD_end:
popStrongContext ();
break;
default:
toDoNext = &globalScope;
break;
}
}
/* name of the last module, used for
* context stacking. */
vString *lastModule;
/* parse
* ... struct (* new global scope *) end
* or
* ... sig (* new global scope *) end
* or
* functor ... -> moduleSpecif
*/
static void moduleSpecif (vString * const ident, ocaToken what)
{
switch (what)
{
case OcaKEYWORD_functor:
toDoNext = &contextualTillToken;
waitedToken = Tok_To;
comeAfter = &moduleSpecif;
break;
case OcaKEYWORD_struct:
case OcaKEYWORD_sig:
pushStrongContext (lastModule, ContextModule);
toDoNext = &globalScope;
break;
case Tok_PARL: /* ( */
toDoNext = &contextualTillToken;
comeAfter = &globalScope;
waitedToken = Tok_PARR;
contextualTillToken (ident, what);
break;
default:
vStringClear (lastModule);
toDoNext = &globalScope;
}
}
/* parse :
* module name = ...
* then pass the token stream to moduleSpecif */
static void moduleDecl (vString * const ident, ocaToken what)
{
switch (what)
{
case OcaKEYWORD_type:
/* just ignore it, name come after */
break;
case OcaIDENTIFIER:
addTag (ident, K_MODULE);
vStringCopy (lastModule, ident);
waitedToken = Tok_EQ;
comeAfter = &moduleSpecif;
toDoNext = &contextualTillToken;
break;
default: /* don't care */
break;
}
}
/* parse :
* class name = ...
* or
* class virtual ['a,'b] classname = ... */
static void classDecl (vString * const ident, ocaToken what)
{
switch (what)
{
case OcaIDENTIFIER:
addTag (ident, K_CLASS);
vStringCopy (lastClass, ident);
toDoNext = &contextualTillToken;
waitedToken = Tok_EQ;
comeAfter = &classSpecif;
break;
case Tok_BRL:
toDoNext = &tillToken;
waitedToken = Tok_BRR;
comeAfter = &classDecl;
break;
default:
break;
}
}
/* Handle a global
* let ident ...
* or
* let rec ident ... */
static void globalLet (vString * const ident, ocaToken what)
{
switch (what)
{
case Tok_PARL:
/* We ignore this token to be able to parse such
* declarations :
* let (ident : type) = ...
*/
break;
case OcaKEYWORD_mutable:
case OcaKEYWORD_virtual:
case OcaKEYWORD_rec:
/* just ignore to be able to parse such declarations:
* let rec ident = ... */
break;
case Tok_Op:
/* we are defining a new operator, it's a
* function definition */
addTag (ident, K_FUNCTION);
pushStrongContext (ident, ContextFunction);
toDoNext = &letParam;
break;
case OcaIDENTIFIER:
addTag (ident, K_VAR);
pushStrongContext (ident, ContextValue);
requestStrongPoping ();
toDoNext = &letParam;
break;
case OcaKEYWORD_end:
popStrongContext ();
break;
default:
toDoNext = &globalScope;
break;
}
}
/* Handle the "strong" top levels, all 'big' declarations
* happen here */
static void globalScope (vString * const UNUSED (ident), ocaToken what)
{
/* Do not touch, this is used only by the global scope
* to handle an 'and' */
static parseNext previousParser = &globalScope;
switch (what)
{
case OcaKEYWORD_and:
cleanupPreviousParser ();
toDoNext = previousParser;
break;
case OcaKEYWORD_type:
cleanupPreviousParser ();
toDoNext = &typeDecl;
previousParser = &typeDecl;
break;
case OcaKEYWORD_class:
cleanupPreviousParser ();
toDoNext = &classDecl;
previousParser = &classDecl;
break;
case OcaKEYWORD_module:
cleanupPreviousParser ();
toDoNext = &moduleDecl;
previousParser = &moduleDecl;
break;
case OcaKEYWORD_end:
needStrongPoping = FALSE;
killCurrentState ();
/*popStrongContext(); */
break;
case OcaKEYWORD_method:
cleanupPreviousParser ();
toDoNext = &methodDecl;
/* and is not allowed in methods */
break;
/* val is mixed with let as global
* to be able to handle mli & new syntax */
case OcaKEYWORD_val:
case OcaKEYWORD_value:
case OcaKEYWORD_let:
cleanupPreviousParser ();
toDoNext = &globalLet;
previousParser = &globalLet;
break;
case OcaKEYWORD_exception:
cleanupPreviousParser ();
toDoNext = &exceptionDecl;
previousParser = &globalScope;
break;
/* must be a #line directive, discard the
* whole line. */
case Tok_Sharp:
/* ignore */
break;
default:
/* we don't care */
break;
}
}
/* Parse expression. Well ignore it is more the case,
* ignore all tokens except "shocking" keywords */
static void localScope (vString * const ident, ocaToken what)
{
switch (what)
{
case Tok_Pipe:
case Tok_PARR:
case Tok_BRR:
case Tok_CurlR:
popSoftContext ();
break;
/* Everything that `begin` has an `end`
* as end is overloaded and signal many end
* of things, we add an empty strong context to
* avoid problem with the end.
*/
case OcaKEYWORD_begin:
pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL);
toDoNext = &mayRedeclare;
break;
case OcaKEYWORD_in:
popLastNamed ();
break;
/* Ok, we got a '{', which is much likely to create
* a record. We cannot treat it like other [ && (,
* because it may contain the 'with' keyword and screw
* everything else. */
case Tok_CurlL:
toDoNext = &contextualTillToken;
waitedToken = Tok_CurlR;
comeAfter = &localScope;
contextualTillToken (ident, what);
break;
/* Yeah imperative feature of OCaml,
* a ';' like in C */
case Tok_semi:
toDoNext = &mayRedeclare;
break;
case Tok_PARL:
case Tok_BRL:
pushEmptyContext (&localScope);
toDoNext = &mayRedeclare;
break;
case OcaKEYWORD_and:
popLastNamed ();
toDoNext = &localLet;
break;
case OcaKEYWORD_else:
case OcaKEYWORD_then:
popSoftContext ();
pushEmptyContext (&localScope);
toDoNext = &mayRedeclare;
break;
case OcaKEYWORD_if:
pushEmptyContext (&localScope);
toDoNext = &mayRedeclare;
break;
case OcaKEYWORD_match:
pushEmptyContext (&localScope);
toDoNext = &mayRedeclare;
break;
case OcaKEYWORD_with:
popSoftContext ();
toDoNext = &matchPattern;
pushEmptyContext (&matchPattern);
break;
case OcaKEYWORD_end:
killCurrentState ();
break;
case OcaKEYWORD_fun:
comeAfter = &mayRedeclare;
toDoNext = &tillToken;
waitedToken = Tok_To;
break;
case OcaKEYWORD_done:
case OcaKEYWORD_val:
/* doesn't care */
break;
default:
requestStrongPoping ();
globalScope (ident, what);
break;
}
}
/*////////////////////////////////////////////////////////////////
//// Deal with the system */
/* in OCaml the file name is the module name used in the language
* with it first letter put in upper case */
static void computeModuleName ( void )
{
/* in Ocaml the file name define a module.
* so we define a module =)
*/
const char *filename = getSourceFileName ();
int beginIndex = 0;
int endIndex = strlen (filename) - 1;
vString *moduleName = vStringNew ();
while (filename[endIndex] != '.' && endIndex > 0)
endIndex--;
/* avoid problem with path in front of filename */
beginIndex = endIndex;
while (beginIndex > 0)
{
if (filename[beginIndex] == '\\' || filename[beginIndex] == '/')
{
beginIndex++;
break;
}
beginIndex--;
}
vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex);
vStringTerminate (moduleName);
if (isLowerAlpha (moduleName->buffer[0]))
moduleName->buffer[0] += ('A' - 'a');
addTag (moduleName, K_MODULE);
vStringDelete (moduleName);
}
/* Allocate all string of the context stack */
static void initStack ( void )
{
int i;
for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
stack[i].contextName = vStringNew ();
stackIndex = 0;
}
static void clearStack ( void )
{
int i;
for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
vStringDelete (stack[i].contextName);
}
static void findOcamlTags (void)
{
vString *name = vStringNew ();
lexingState st;
ocaToken tok;
initStack ();
computeModuleName ();
tempIdent = vStringNew ();
lastModule = vStringNew ();
lastClass = vStringNew ();
voidName = vStringNew ();
vStringCopyS (voidName, "_");
st.name = vStringNew ();
st.cp = fileReadLine ();
toDoNext = &globalScope;
tok = lex (&st);
while (tok != Tok_EOF)
{
(*toDoNext) (st.name, tok);
tok = lex (&st);
}
vStringDelete (name);
vStringDelete (voidName);
vStringDelete (tempIdent);
vStringDelete (lastModule);
vStringDelete (lastClass);
clearStack ();
}
static void ocamlInitialize (const langType language)
{
Lang_Ocaml = language;
initOperatorTable ();
initKeywordHash ();
}
extern parserDefinition *OcamlParser (void)
{
static const char *const extensions[] = { "ml", "mli", NULL };
parserDefinition *def = parserNew ("OCaml");
def->kinds = OcamlKinds;
def->kindCount = KIND_COUNT (OcamlKinds);
def->extensions = extensions;
def->parser = findOcamlTags;
def->initialize = ocamlInitialize;
return def;
}