cosmopolitan/third_party/ctags/tex.c
2022-11-13 13:26:28 -08:00

524 lines
12 KiB
C

// clang-format off
/*
* $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $
*
* Copyright (c) 2008, David Fishburn
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for TeX language files.
*
* Tex language reference:
* http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX
*/
/*
* INCLUDE FILES
*/
#include "third_party/ctags/general.h" /* must always come first */
#include "libc/str/str.h" /* to define isalpha () */
#include "libc/runtime/runtime.h"
#ifdef DEBUG
#include "libc/calls/calls.h"
#include "libc/calls/dprintf.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/mem/fmt.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
#endif
#include "third_party/ctags/debug.h"
#include "third_party/ctags/entry.h"
#include "third_party/ctags/keyword.h"
#include "third_party/ctags/parse.h"
#include "third_party/ctags/read.h"
#include "third_party/ctags/routines.h"
#include "third_party/ctags/vstring.h"
/*
* MACROS
*/
#define isType(token,t) (boolean) ((token)->type == (t))
#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
/*
* DATA DECLARATIONS
*/
typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
/*
* Used to specify type of keyword.
*/
typedef enum eKeywordId {
KEYWORD_NONE = -1,
KEYWORD_chapter,
KEYWORD_section,
KEYWORD_subsection,
KEYWORD_subsubsection,
KEYWORD_part,
KEYWORD_paragraph,
KEYWORD_subparagraph,
KEYWORD_include
} keywordId;
/* Used to determine whether keyword is valid for the token language and
* what its ID is.
*/
typedef struct sKeywordDesc {
const char *name;
keywordId id;
} keywordDesc;
typedef enum eTokenType {
TOKEN_UNDEFINED,
TOKEN_CHARACTER,
TOKEN_CLOSE_PAREN,
TOKEN_COMMA,
TOKEN_KEYWORD,
TOKEN_OPEN_PAREN,
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_OPEN_CURLY,
TOKEN_CLOSE_CURLY,
TOKEN_OPEN_SQUARE,
TOKEN_CLOSE_SQUARE,
TOKEN_QUESTION_MARK,
TOKEN_STAR
} tokenType;
typedef struct sTokenInfo {
tokenType type;
keywordId keyword;
vString * string;
vString * scope;
unsigned long lineNumber;
fpos_t filePosition;
} tokenInfo;
/*
* DATA DEFINITIONS
*/
static langType Lang_js;
static jmp_buf Exception;
typedef enum {
TEXTAG_CHAPTER,
TEXTAG_SECTION,
TEXTAG_SUBSECTION,
TEXTAG_SUBSUBSECTION,
TEXTAG_PART,
TEXTAG_PARAGRAPH,
TEXTAG_SUBPARAGRAPH,
TEXTAG_INCLUDE,
TEXTAG_COUNT
} texKind;
static kindOption TexKinds [] = {
{ TRUE, 'c', "chapter", "chapters" },
{ TRUE, 's', "section", "sections" },
{ TRUE, 'u', "subsection", "subsections" },
{ TRUE, 'b', "subsubsection", "subsubsections" },
{ TRUE, 'p', "part", "parts" },
{ TRUE, 'P', "paragraph", "paragraphs" },
{ TRUE, 'G', "subparagraph", "subparagraphs" },
{ TRUE, 'i', "include", "includes" }
};
static const keywordDesc TexKeywordTable [] = {
/* keyword keyword ID */
{ "chapter", KEYWORD_chapter },
{ "section", KEYWORD_section },
{ "subsection", KEYWORD_subsection },
{ "subsubsection", KEYWORD_subsubsection },
{ "part", KEYWORD_part },
{ "paragraph", KEYWORD_paragraph },
{ "subparagraph", KEYWORD_subparagraph },
{ "include", KEYWORD_include }
};
/*
* FUNCTION DEFINITIONS
*/
static boolean isIdentChar (const int c)
{
return (boolean)
(isalpha (c) || isdigit (c) || c == '$' ||
c == '_' || c == '#' || c == '-' || c == '.');
}
static void buildTexKeywordHash (void)
{
const size_t count = sizeof (TexKeywordTable) /
sizeof (TexKeywordTable [0]);
size_t i;
for (i = 0 ; i < count ; ++i)
{
const keywordDesc* const p = &TexKeywordTable [i];
addKeyword (p->name, Lang_js, (int) p->id);
}
}
static tokenInfo *newToken (void)
{
tokenInfo *const token = xMalloc (1, tokenInfo);
token->type = TOKEN_UNDEFINED;
token->keyword = KEYWORD_NONE;
token->string = vStringNew ();
token->scope = vStringNew ();
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
return token;
}
static void deleteToken (tokenInfo *const token)
{
vStringDelete (token->string);
vStringDelete (token->scope);
eFree (token);
}
/*
* Tag generation functions
*/
static void makeConstTag (tokenInfo *const token, const texKind kind)
{
if (TexKinds [kind].enabled )
{
const char *const name = vStringValue (token->string);
tagEntryInfo e;
initTagEntry (&e, name);
e.lineNumber = token->lineNumber;
e.filePosition = token->filePosition;
e.kindName = TexKinds [kind].name;
e.kind = TexKinds [kind].letter;
makeTagEntry (&e);
}
}
static void makeTexTag (tokenInfo *const token, texKind kind)
{
vString * fulltag;
if (TexKinds [kind].enabled)
{
/*
* If a scope has been added to the token, change the token
* string to include the scope when making the tag.
*/
if ( vStringLength (token->scope) > 0 )
{
fulltag = vStringNew ();
vStringCopy (fulltag, token->scope);
vStringCatS (fulltag, ".");
vStringCatS (fulltag, vStringValue (token->string));
vStringTerminate (fulltag);
vStringCopy (token->string, fulltag);
vStringDelete (fulltag);
}
makeConstTag (token, kind);
}
}
/*
* Parsing functions
*/
static void parseString (vString *const string, const int delimiter)
{
boolean end = FALSE;
while (! end)
{
int c = fileGetc ();
if (c == EOF)
end = TRUE;
else if (c == '\\')
{
c = fileGetc(); /* This maybe a ' or ". */
vStringPut (string, c);
}
else if (c == delimiter)
end = TRUE;
else
vStringPut (string, c);
}
vStringTerminate (string);
}
/*
* Read a C identifier beginning with "firstChar" and places it into
* "name".
*/
static void parseIdentifier (vString *const string, const int firstChar)
{
int c = firstChar;
Assert (isIdentChar (c));
do
{
vStringPut (string, c);
c = fileGetc ();
} while (isIdentChar (c));
vStringTerminate (string);
if (!isspace (c))
fileUngetc (c); /* unget non-identifier character */
}
static void readToken (tokenInfo *const token)
{
int c;
token->type = TOKEN_UNDEFINED;
token->keyword = KEYWORD_NONE;
vStringClear (token->string);
getNextChar:
do
{
c = fileGetc ();
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
}
while (c == '\t' || c == ' ' || c == '\n');
switch (c)
{
case EOF: longjmp (Exception, (int)ExceptionEOF); break;
case '(': token->type = TOKEN_OPEN_PAREN; break;
case ')': token->type = TOKEN_CLOSE_PAREN; break;
case ',': token->type = TOKEN_COMMA; break;
case '{': token->type = TOKEN_OPEN_CURLY; break;
case '}': token->type = TOKEN_CLOSE_CURLY; break;
case '[': token->type = TOKEN_OPEN_SQUARE; break;
case ']': token->type = TOKEN_CLOSE_SQUARE; break;
case '*': token->type = TOKEN_STAR; break;
case '\'':
case '"':
token->type = TOKEN_STRING;
parseString (token->string, c);
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
break;
case '\\':
/*
* All Tex tags start with a backslash.
* Check if the next character is an alpha character
* else it is not a potential tex tag.
*/
c = fileGetc ();
if (! isalpha (c))
fileUngetc (c);
else
{
parseIdentifier (token->string, c);
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
token->keyword = analyzeToken (token->string, Lang_js);
if (isKeyword (token, KEYWORD_NONE))
token->type = TOKEN_IDENTIFIER;
else
token->type = TOKEN_KEYWORD;
}
break;
case '%':
fileSkipToCharacter ('\n'); /* % are single line comments */
goto getNextChar;
break;
default:
if (! isIdentChar (c))
token->type = TOKEN_UNDEFINED;
else
{
parseIdentifier (token->string, c);
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
token->type = TOKEN_IDENTIFIER;
}
break;
}
}
static void copyToken (tokenInfo *const dest, tokenInfo *const src)
{
dest->lineNumber = src->lineNumber;
dest->filePosition = src->filePosition;
dest->type = src->type;
dest->keyword = src->keyword;
vStringCopy (dest->string, src->string);
vStringCopy (dest->scope, src->scope);
}
/*
* Scanning functions
*/
static boolean parseTag (tokenInfo *const token, texKind kind)
{
tokenInfo *const name = newToken ();
vString * fullname;
boolean useLongName = TRUE;
fullname = vStringNew ();
vStringClear (fullname);
/*
* Tex tags are of these formats:
* \keyword{any number of words}
* \keyword[short desc]{any number of words}
* \keyword*[short desc]{any number of words}
*
* When a keyword is found, loop through all words within
* the curly braces for the tag name.
*/
if (isType (token, TOKEN_KEYWORD))
{
copyToken (name, token);
readToken (token);
}
if (isType (token, TOKEN_OPEN_SQUARE))
{
useLongName = FALSE;
readToken (token);
while (! isType (token, TOKEN_CLOSE_SQUARE) )
{
if (isType (token, TOKEN_IDENTIFIER))
{
if (fullname->length > 0)
vStringCatS (fullname, " ");
vStringCatS (fullname, vStringValue (token->string));
}
readToken (token);
}
vStringTerminate (fullname);
vStringCopy (name->string, fullname);
makeTexTag (name, kind);
}
if (isType (token, TOKEN_STAR))
{
readToken (token);
}
if (isType (token, TOKEN_OPEN_CURLY))
{
readToken (token);
while (! isType (token, TOKEN_CLOSE_CURLY) )
{
/* if (isType (token, TOKEN_IDENTIFIER) && useLongName) */
if (useLongName)
{
if (fullname->length > 0)
vStringCatS (fullname, " ");
vStringCatS (fullname, vStringValue (token->string));
}
readToken (token);
}
if (useLongName)
{
vStringTerminate (fullname);
vStringCopy (name->string, fullname);
makeTexTag (name, kind);
}
}
deleteToken (name);
vStringDelete (fullname);
return TRUE;
}
static void parseTexFile (tokenInfo *const token)
{
do
{
readToken (token);
if (isType (token, TOKEN_KEYWORD))
{
switch (token->keyword)
{
case KEYWORD_chapter:
parseTag (token, TEXTAG_CHAPTER);
break;
case KEYWORD_section:
parseTag (token, TEXTAG_SECTION);
break;
case KEYWORD_subsection:
parseTag (token, TEXTAG_SUBSECTION);
break;
case KEYWORD_subsubsection:
parseTag (token, TEXTAG_SUBSUBSECTION);
break;
case KEYWORD_part:
parseTag (token, TEXTAG_PART);
break;
case KEYWORD_paragraph:
parseTag (token, TEXTAG_PARAGRAPH);
break;
case KEYWORD_subparagraph:
parseTag (token, TEXTAG_SUBPARAGRAPH);
break;
case KEYWORD_include:
parseTag (token, TEXTAG_INCLUDE);
break;
default:
break;
}
}
} while (TRUE);
}
static void initialize (const langType language)
{
Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT);
Lang_js = language;
buildTexKeywordHash ();
}
static void findTexTags (void)
{
tokenInfo *const token = newToken ();
exception_t exception;
exception = (exception_t) (setjmp (Exception));
while (exception == ExceptionNone)
parseTexFile (token);
deleteToken (token);
}
/* Create parser definition stucture */
extern parserDefinition* TexParser (void)
{
static const char *const extensions [] = { "tex", NULL };
parserDefinition *const def = parserNew ("Tex");
def->extensions = extensions;
/*
* New definitions for parsing instead of regex
*/
def->kinds = TexKinds;
def->kindCount = KIND_COUNT (TexKinds);
def->parser = findTexTags;
def->initialize = initialize;
return def;
}
/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */