diff --git a/Makefile b/Makefile index 0e492dc50..48248cdcb 100644 --- a/Makefile +++ b/Makefile @@ -176,6 +176,7 @@ include third_party/tr/tr.mk include third_party/sed/sed.mk include third_party/awk/awk.mk include third_party/make/make.mk +include third_party/ctags/ctags.mk include third_party/finger/finger.mk include third_party/argon2/argon2.mk include third_party/smallz4/smallz4.mk @@ -278,14 +279,14 @@ o/$(MODE)/hdrs-old.txt: o/$(MODE)/.x $(MAKEFILES) $(call uniq,$(foreach x,$(HDRS $(file >$@) $(foreach x,$(HDRS) $(INCS),$(file >>$@,$(x))) TAGS: private .UNSANDBOXED = 1 -TAGS: o/$(MODE)/srcs-old.txt $(SRCS) +TAGS: o/$(MODE)/srcs-old.txt $(SRCS) o/$(MODE)/third_party/ctags/ctags.com @$(RM) $@ - @$(TAGS) $(TAGSFLAGS) -L $< -o $@ + @o/$(MODE)/third_party/ctags/ctags.com $(TAGSFLAGS) -L $< -o $@ HTAGS: private .UNSANDBOXED = 1 -HTAGS: o/$(MODE)/hdrs-old.txt $(HDRS) +HTAGS: o/$(MODE)/hdrs-old.txt $(HDRS) o/$(MODE)/third_party/ctags/ctags.com @$(RM) $@ - @build/htags -L $< -o $@ + @build/htags o/$(MODE)/third_party/ctags/ctags.com -L $< -o $@ loc: private .UNSANDBOXED = 1 loc: o/$(MODE)/tool/build/summy.com @@ -438,9 +439,9 @@ $(SRCS): $(HDRS): $(INCS): .DEFAULT: - @$(ECHO) >&2 - @$(ECHO) NOTE: deleting o/$(MODE)/depend because of an unspecified prerequisite: $@ >&2 - @$(ECHO) >&2 + @$(ECHO) + @$(ECHO) NOTE: deleting o/$(MODE)/depend because of an unspecified prerequisite: $@ + @$(ECHO) $(RM) o/$(MODE)/depend -include o/$(MODE)/depend diff --git a/build/definitions.mk b/build/definitions.mk index 216c80b4b..172473c36 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -45,7 +45,6 @@ LC_ALL = C SOURCE_DATE_EPOCH = 0 -TAGS ?= /usr/bin/ctags # emacs source builds or something breaks it ARFLAGS = rcsD ZFLAGS ?= XARGS ?= xargs -P4 -rs8000 diff --git a/build/htags b/build/htags index f1ffc2dd0..a580edf29 100755 --- a/build/htags +++ b/build/htags @@ -43,6 +43,9 @@ # '(progn # (add-hook 'c-mode-common-hook 'jart-c-mode-common-hook))) +TAGS="$1" +shift + # ctags doesn't understand atomics, e.g. # extern char **environ; set -- --regex-c='/_Atomic(\([^)]*\))/\1/b' "$@" @@ -63,7 +66,7 @@ set -- --regex-c='/^extern [^(]*(\*const \([^)]*\))(/\1/b' "$@" # struct WorstSoftwareEver; set -- --regex-c='/^struct.*;$/uehocruehcroue/b' "$@" -exec ${TAGS:-ctags} \ +exec $TAGS \ -e \ --langmap=c:.c.h \ --exclude=libc/nt/struct/imagefileheader.internal.h \ diff --git a/third_party/ctags/COPYING b/third_party/ctags/COPYING new file mode 100644 index 000000000..b201bfb03 --- /dev/null +++ b/third_party/ctags/COPYING @@ -0,0 +1,341 @@ +// clang-format off + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/third_party/ctags/README.cosmo b/third_party/ctags/README.cosmo new file mode 100644 index 000000000..a5e07ea21 --- /dev/null +++ b/third_party/ctags/README.cosmo @@ -0,0 +1,16 @@ +// clang-format off +ORIGIN + + Debian 11 + Package: exuberant-ctags + Version: 1:5.9~svn20110310-14 + +LICENSE + + GNU GPL v2 + +LOCAL CHANGES + + - Rename __unused__ to __unused + - Use [[:alnum:]] rather than invalid strings + - Remove support for VAX, VMS, OS2, QDOS, Amiga, etc. diff --git a/third_party/ctags/ant.c b/third_party/ctags/ant.c new file mode 100644 index 000000000..e350a80b3 --- /dev/null +++ b/third_party/ctags/ant.c @@ -0,0 +1,44 @@ +// clang-format off +/* +* $Id$ +* +* Copyright (c) 2008, David Fishburn +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Ant language files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "third_party/ctags/parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installAntRegex (const langType language) +{ + addTagRegex (language, + "^[ \t]*<[ \t]*project[^>]+name=\"([^\"]+)\".*", "\\1", "p,project,projects", NULL); + addTagRegex (language, + "^[ \t]*<[ \t]*target[^>]+name=\"([^\"]+)\".*", "\\1", "t,target,targets", NULL); +} + +extern parserDefinition* AntParser () +{ + static const char *const extensions [] = { "build.xml", NULL }; + parserDefinition* const def = parserNew ("Ant"); + def->extensions = extensions; + def->initialize = installAntRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/args.c b/third_party/ctags/args.c new file mode 100644 index 000000000..45b5ebeee --- /dev/null +++ b/third_party/ctags/args.c @@ -0,0 +1,268 @@ +/* + * $Id: args.c 536 2007-06-02 06:09:00Z elliotth $ + * + * Copyright (c) 1999-2002, Darren Hiebert + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for reading command line arguments. + */ +#include "third_party/ctags/general.h" /* must always come first */ +/**/ +#include "libc/str/str.h" +#include "third_party/ctags/args.h" +#include "third_party/ctags/debug.h" +#include "third_party/ctags/routines.h" +// clang-format off + +/* +* FUNCTION DEFINITIONS +*/ + +static char *nextStringArg (const char** const next) +{ + char* result = NULL; + const char* start; + + Assert (*next != NULL); + for (start = *next ; isspace ((int) *start) ; ++start) + ; + if (*start == '\0') + *next = start; + else + { + size_t length; + const char* end; + + for (end = start ; *end != '\0' && ! isspace ((int) *end) ; ++end) + ; + length = end - start; + Assert (length > 0); + result = xMalloc (length + 1, char); + strncpy (result, start, length); + result [length] = '\0'; + *next = end; + } + return result; +} + +static char* nextStringLine (const char** const next) +{ + char* result = NULL; + size_t length; + const char* end; + + Assert (*next != NULL); + for (end = *next ; *end != '\n' && *end != '\0' ; ++end) + ; + length = end - *next; + if (length > 0) + { + result = xMalloc (length + 1, char); + strncpy (result, *next, length); + result [length] = '\0'; + } + if (*end == '\n') + ++end; + else if (*end == '\r') + { + ++end; + if (*end == '\n') + ++end; + } + *next = end; + return result; +} + +static char* nextString (const Arguments* const current, const char** const next) +{ + char* result; + if (current->lineMode) + result = nextStringLine (next); + else + result = nextStringArg (next); + return result; +} + +static char* nextFileArg (FILE* const fp) +{ + char* result = NULL; + Assert (fp != NULL); + if (! feof (fp)) + { + vString* vs = vStringNew (); + int c; + do + c = fgetc (fp); + while (isspace (c)); + + if (c != EOF) + { + do + { + vStringPut (vs, c); + c = fgetc (fp); + } while (c != EOF && ! isspace (c)); + vStringTerminate (vs); + Assert (vStringLength (vs) > 0); + result = xMalloc (vStringLength (vs) + 1, char); + strcpy (result, vStringValue (vs)); + } + vStringDelete (vs); + } + return result; +} + +static char* nextFileLine (FILE* const fp) +{ + char* result = NULL; + if (! feof (fp)) + { + vString* vs = vStringNew (); + int c; + + Assert (fp != NULL); + c = fgetc (fp); + while (c != EOF) + { + if (c != '\n' && c != '\r') + vStringPut (vs, c); + else if (vStringLength (vs) > 0) + break; + c = fgetc (fp); + } + if (c != EOF || vStringLength (vs) > 0) + { + if (c == '\r') + { + c = fgetc (fp); + if (c != '\n') + c = ungetc (c, fp); + } + vStringTerminate (vs); + vStringStripTrailing (vs); + result = xMalloc (vStringLength (vs) + 1, char); + strcpy (result, vStringValue (vs)); + } + vStringDelete (vs); + } + return result; +} + +static char* nextFileString (const Arguments* const current, FILE* const fp) +{ + char* result; + if (current->lineMode) + result = nextFileLine (fp); + else + result = nextFileArg (fp); + return result; +} + +extern Arguments* argNewFromString (const char* const string) +{ + Arguments* result = xMalloc (1, Arguments); + memset (result, 0, sizeof (Arguments)); + result->type = ARG_STRING; + result->u.stringArgs.string = string; + result->u.stringArgs.item = string; + result->u.stringArgs.next = string; + result->item = nextString (result, &result->u.stringArgs.next); + return result; +} + +extern Arguments* argNewFromArgv (char* const* const argv) +{ + Arguments* result = xMalloc (1, Arguments); + memset (result, 0, sizeof (Arguments)); + result->type = ARG_ARGV; + result->u.argvArgs.argv = argv; + result->u.argvArgs.item = result->u.argvArgs.argv; + result->item = *result->u.argvArgs.item; + return result; +} + +extern Arguments* argNewFromFile (FILE* const fp) +{ + Arguments* result = xMalloc (1, Arguments); + memset (result, 0, sizeof (Arguments)); + result->type = ARG_FILE; + result->u.fileArgs.fp = fp; + result->item = nextFileString (result, result->u.fileArgs.fp); + return result; +} + +extern Arguments* argNewFromLineFile (FILE* const fp) +{ + Arguments* result = xMalloc (1, Arguments); + memset (result, 0, sizeof (Arguments)); + result->type = ARG_FILE; + result->lineMode = TRUE; + result->u.fileArgs.fp = fp; + result->item = nextFileString (result, result->u.fileArgs.fp); + return result; +} + +extern char *argItem (const Arguments* const current) +{ + Assert (current != NULL); + Assert (! argOff (current)); + return current->item; +} + +extern boolean argOff (const Arguments* const current) +{ + Assert (current != NULL); + return (boolean) (current->item == NULL); +} + +extern void argSetWordMode (Arguments* const current) +{ + Assert (current != NULL); + current->lineMode = FALSE; +} + +extern void argSetLineMode (Arguments* const current) +{ + Assert (current != NULL); + current->lineMode = TRUE; +} + +extern void argForth (Arguments* const current) +{ + Assert (current != NULL); + Assert (! argOff (current)); + switch (current->type) + { + case ARG_STRING: + if (current->item != NULL) + eFree (current->item); + current->u.stringArgs.item = current->u.stringArgs.next; + current->item = nextString (current, ¤t->u.stringArgs.next); + break; + case ARG_ARGV: + ++current->u.argvArgs.item; + current->item = *current->u.argvArgs.item; + break; + case ARG_FILE: + if (current->item != NULL) + eFree (current->item); + current->item = nextFileString (current, current->u.fileArgs.fp); + break; + default: + Assert ("Invalid argument type" == NULL); + break; + } +} + +extern void argDelete (Arguments* const current) +{ + Assert (current != NULL); + if (current->type == ARG_STRING && current->item != NULL) + eFree (current->item); + memset (current, 0, sizeof (Arguments)); + eFree (current); +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/args.h b/third_party/ctags/args.h new file mode 100644 index 000000000..e52495e73 --- /dev/null +++ b/third_party/ctags/args.h @@ -0,0 +1,71 @@ +// clang-format off +/* +* $Id: args.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1999-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Defines external interface to command line argument reading. +*/ +#ifndef _ARGS_H +#define _ARGS_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +/* +* DATA DECLARATIONS +*/ + +typedef enum { ARG_NONE, ARG_STRING, ARG_ARGV, ARG_FILE } argType; + +typedef struct sArgs { + argType type; + union { + struct sStringArgs { + const char* string; + const char* next; + const char* item; + } stringArgs; + struct sArgvArgs { + char* const* argv; + char* const* item; + } argvArgs; + struct sFileArgs { + FILE* fp; + } fileArgs; + } u; + char* item; + boolean lineMode; +} Arguments; + +/* +* FUNCTION PROTOTYPES +*/ +extern Arguments* argNewFromString (const char* const string); +extern Arguments* argNewFromArgv (char* const* const argv); +extern Arguments* argNewFromFile (FILE* const fp); +extern Arguments* argNewFromLineFile (FILE* const fp); +extern char *argItem (const Arguments* const current); +extern boolean argOff (const Arguments* const current); +extern void argSetWordMode (Arguments* const current); +extern void argSetLineMode (Arguments* const current); +extern void argForth (Arguments* const current); +extern void argDelete (Arguments* const current); + +#endif /* _ARGS_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/asm.c b/third_party/ctags/asm.c new file mode 100644 index 000000000..8b3c31492 --- /dev/null +++ b/third_party/ctags/asm.c @@ -0,0 +1,389 @@ +// clang-format off +/* +* $Id: asm.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for assembly language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DECLARATIONS +*/ +typedef enum { + K_NONE = -1, K_DEFINE, K_LABEL, K_MACRO, K_TYPE +} AsmKind; + +typedef enum { + OP_UNDEFINED = -1, + OP_ALIGN, + OP_COLON_EQUAL, + OP_END, + OP_ENDM, + OP_ENDMACRO, + OP_ENDP, + OP_ENDS, + OP_EQU, + OP_EQUAL, + OP_LABEL, + OP_MACRO, + OP_PROC, + OP_RECORD, + OP_SECTIONS, + OP_SET, + OP_STRUCT, + OP_LAST +} opKeyword; + +typedef struct { + const char *operator; + opKeyword keyword; +} asmKeyword; + +typedef struct { + opKeyword keyword; + AsmKind kind; +} opKind; + +/* +* DATA DEFINITIONS +*/ +static langType Lang_asm; + +static kindOption AsmKinds [] = { + { TRUE, 'd', "define", "defines" }, + { TRUE, 'l', "label", "labels" }, + { TRUE, 'm', "macro", "macros" }, + { TRUE, 't', "type", "types (structs and records)" } +}; + +static const asmKeyword AsmKeywords [] = { + { "align", OP_ALIGN }, + { "endmacro", OP_ENDMACRO }, + { "endm", OP_ENDM }, + { "end", OP_END }, + { "endp", OP_ENDP }, + { "ends", OP_ENDS }, + { "equ", OP_EQU }, + { "label", OP_LABEL }, + { "macro", OP_MACRO }, + { ":=", OP_COLON_EQUAL }, + { "=", OP_EQUAL }, + { "proc", OP_PROC }, + { "record", OP_RECORD }, + { "sections", OP_SECTIONS }, + { "set", OP_SET }, + { "struct", OP_STRUCT } +}; + +static const opKind OpKinds [] = { + /* must be ordered same as opKeyword enumeration */ + { OP_ALIGN, K_NONE }, + { OP_COLON_EQUAL, K_DEFINE }, + { OP_END, K_NONE }, + { OP_ENDM, K_NONE }, + { OP_ENDMACRO, K_NONE }, + { OP_ENDP, K_NONE }, + { OP_ENDS, K_NONE }, + { OP_EQU, K_DEFINE }, + { OP_EQUAL, K_DEFINE }, + { OP_LABEL, K_LABEL }, + { OP_MACRO, K_MACRO }, + { OP_PROC, K_LABEL }, + { OP_RECORD, K_TYPE }, + { OP_SECTIONS, K_NONE }, + { OP_SET, K_DEFINE }, + { OP_STRUCT, K_TYPE } +}; + +/* +* FUNCTION DEFINITIONS +*/ +static void buildAsmKeywordHash (void) +{ + const size_t count = sizeof (AsmKeywords) / sizeof (AsmKeywords [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const asmKeyword* const p = AsmKeywords + i; + addKeyword (p->operator, Lang_asm, (int) p->keyword); + } +} + +static opKeyword analyzeOperator (const vString *const op) +{ + vString *keyword = vStringNew (); + opKeyword result; + + vStringCopyToLower (keyword, op); + result = (opKeyword) lookupKeyword (vStringValue (keyword), Lang_asm); + vStringDelete (keyword); + return result; +} + +static boolean isInitialSymbolCharacter (int c) +{ + return (boolean) (c != '\0' && (isalpha (c) || strchr ("_$", c) != NULL)); +} + +static boolean isSymbolCharacter (int c) +{ + /* '?' character is allowed in AMD 29K family */ + return (boolean) (c != '\0' && (isalnum (c) || strchr ("_$?", c) != NULL)); +} + +static boolean readPreProc (const unsigned char *const line) +{ + boolean result; + const unsigned char *cp = line; + vString *name = vStringNew (); + while (isSymbolCharacter ((int) *cp)) + { + vStringPut (name, *cp); + ++cp; + } + vStringTerminate (name); + result = (boolean) (strcmp (vStringValue (name), "define") == 0); + if (result) + { + while (isspace ((int) *cp)) + ++cp; + vStringClear (name); + while (isSymbolCharacter ((int) *cp)) + { + vStringPut (name, *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AsmKinds, K_DEFINE); + } + vStringDelete (name); + return result; +} + +static AsmKind operatorKind ( + const vString *const operator, + boolean *const found) +{ + AsmKind result = K_NONE; + const opKeyword kw = analyzeOperator (operator); + *found = (boolean) (kw != OP_UNDEFINED); + if (*found) + { + result = OpKinds [kw].kind; + Assert (OpKinds [kw].keyword == kw); + } + return result; +} + +/* We must check for "DB", "DB.L", "DCB.W" (68000) + */ +static boolean isDefineOperator (const vString *const operator) +{ + const unsigned char *const op = + (unsigned char*) vStringValue (operator); + const size_t length = vStringLength (operator); + const boolean result = (boolean) (length > 0 && + toupper ((int) *op) == 'D' && + (length == 2 || + (length == 4 && (int) op [2] == '.') || + (length == 5 && (int) op [3] == '.'))); + return result; +} + +static void makeAsmTag ( + const vString *const name, + const vString *const operator, + const boolean labelCandidate, + const boolean nameFollows) +{ + if (vStringLength (name) > 0) + { + boolean found; + const AsmKind kind = operatorKind (operator, &found); + if (found) + { + if (kind != K_NONE) + makeSimpleTag (name, AsmKinds, kind); + } + else if (isDefineOperator (operator)) + { + if (! nameFollows) + makeSimpleTag (name, AsmKinds, K_DEFINE); + } + else if (labelCandidate) + { + operatorKind (name, &found); + if (! found) + makeSimpleTag (name, AsmKinds, K_LABEL); + } + } +} + +static const unsigned char *readSymbol ( + const unsigned char *const start, + vString *const sym) +{ + const unsigned char *cp = start; + vStringClear (sym); + if (isInitialSymbolCharacter ((int) *cp)) + { + while (isSymbolCharacter ((int) *cp)) + { + vStringPut (sym, *cp); + ++cp; + } + vStringTerminate (sym); + } + return cp; +} + +static const unsigned char *readOperator ( + const unsigned char *const start, + vString *const operator) +{ + const unsigned char *cp = start; + vStringClear (operator); + while (*cp != '\0' && ! isspace ((int) *cp)) + { + vStringPut (operator, *cp); + ++cp; + } + vStringTerminate (operator); + return cp; +} + +static void findAsmTags (void) +{ + vString *name = vStringNew (); + vString *operator = vStringNew (); + const unsigned char *line; + boolean inCComment = FALSE; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + boolean labelCandidate = (boolean) (! isspace ((int) *cp)); + boolean nameFollows = FALSE; + const boolean isComment = (boolean) + (*cp != '\0' && strchr (";*@", *cp) != NULL); + + /* skip comments */ + if (strncmp ((const char*) cp, "/*", (size_t) 2) == 0) + { + inCComment = TRUE; + cp += 2; + } + if (inCComment) + { + do + { + if (strncmp ((const char*) cp, "*/", (size_t) 2) == 0) + { + inCComment = FALSE; + cp += 2; + break; + } + ++cp; + } while (*cp != '\0'); + } + if (isComment || inCComment) + continue; + + /* read preprocessor defines */ + if (*cp == '#') + { + ++cp; + readPreProc (cp); + continue; + } + + /* skip white space */ + while (isspace ((int) *cp)) + ++cp; + + /* read symbol */ + cp = readSymbol (cp, name); + if (vStringLength (name) > 0 && *cp == ':') + { + labelCandidate = TRUE; + ++cp; + } + + if (! isspace ((int) *cp) && *cp != '\0') + continue; + + /* skip white space */ + while (isspace ((int) *cp)) + ++cp; + + /* skip leading dot */ +#if 0 + if (*cp == '.') + ++cp; +#endif + + cp = readOperator (cp, operator); + + /* attempt second read of symbol */ + if (vStringLength (name) == 0) + { + while (isspace ((int) *cp)) + ++cp; + cp = readSymbol (cp, name); + nameFollows = TRUE; + } + makeAsmTag (name, operator, labelCandidate, nameFollows); + } + vStringDelete (name); + vStringDelete (operator); +} + +static void initialize (const langType language) +{ + Lang_asm = language; + buildAsmKeywordHash (); +} + +extern parserDefinition* AsmParser (void) +{ + static const char *const extensions [] = { + "asm", "ASM", "s", "S", NULL + }; + static const char *const patterns [] = { + "*.A51", + "*.29[kK]", + "*.[68][68][kKsSxX]", + "*.[xX][68][68]", + NULL + }; + parserDefinition* def = parserNew ("Asm"); + def->kinds = AsmKinds; + def->kindCount = KIND_COUNT (AsmKinds); + def->extensions = extensions; + def->patterns = patterns; + def->parser = findAsmTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/asp.c b/third_party/ctags/asp.c new file mode 100644 index 000000000..cd1209f16 --- /dev/null +++ b/third_party/ctags/asp.c @@ -0,0 +1,330 @@ +// clang-format off +/* +* $Id: asp.c 711 2009-07-04 16:52:11Z dhiebert $ +* +* Copyright (c) 2000, Patrick Dehne +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the ASP (Active +* Server Pages) web page scripting language. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_CONST, K_CLASS, K_FUNCTION, K_SUB, K_DIM +} aspKind; + +static kindOption AspKinds [] = { + { TRUE, 'd', "constant", "constants"}, + { TRUE, 'c', "class", "classes"}, + { TRUE, 'f', "function", "functions"}, + { TRUE, 's', "subroutine", "subroutines"}, + { TRUE, 'v', "variable", "variables"} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void findAspTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + + while (*cp != '\0') + { + /* jump over whitespace */ + while (isspace ((int)*cp)) + cp++; + + /* jump over strings */ + if (*cp == '"') + { + cp++; + while (*cp!='"' && *cp!='\0') + cp++; + } + + /* jump over comments */ + else if (*cp == '\'') + break; + + /* jump over end function/sub lines */ + else if (strncasecmp ((const char*) cp, "end", (size_t) 3)== 0) + { + cp += 3; + if (isspace ((int)*cp)) + { + while (isspace ((int)*cp)) + ++cp; + + if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp+=8; + break; + } + + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp+=3; + break; + } + } + } + + /* jump over exit function/sub lines */ + else if (strncasecmp ((const char*) cp, "exit", (size_t) 4)==0) + { + cp += 4; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + + if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp+=8; + break; + } + + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp+=3; + break; + } + } + } + + /* class member? */ + else if (strncasecmp ((const char*) cp, "public", (size_t) 6) == 0) + { + cp += 6; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp+=8; + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_FUNCTION); + vStringClear (name); + } + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp+=3; + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_SUB); + vStringClear (name); + } + else { + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_DIM); + vStringClear (name); + } + } + } + else if (strncasecmp ((const char*) cp, "private", (size_t) 7) == 0) + { + cp += 7; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp+=8; + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_FUNCTION); + vStringClear (name); + } + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp+=3; + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_SUB); + vStringClear (name); + } + else { + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_DIM); + vStringClear (name); + } + } + } + + /* function? */ + else if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp += 8; + + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_FUNCTION); + vStringClear (name); + } + } + + /* sub? */ + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp += 3; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_SUB); + vStringClear (name); + } + } + + /* dim variable? */ + else if (strncasecmp ((const char*) cp, "dim", (size_t) 3) == 0) + { + cp += 3; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_DIM); + vStringClear (name); + } + } + + /* class declaration? */ + else if (strncasecmp ((const char*) cp, "class", (size_t) 5) == 0) + { + cp += 5; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_CLASS); + vStringClear (name); + } + } + + /* const declaration? */ + else if (strncasecmp ((const char*) cp, "const", (size_t) 5) == 0) + { + cp += 5; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_CONST); + vStringClear (name); + } + } + + /* nothing relevant */ + else if (*cp != '\0') + cp++; + } + } + vStringDelete (name); +} + +extern parserDefinition* AspParser (void) +{ + static const char *const extensions [] = { "asp", "asa", NULL }; + parserDefinition* def = parserNew ("Asp"); + def->kinds = AspKinds; + def->kindCount = KIND_COUNT (AspKinds); + def->extensions = extensions; + def->parser = findAspTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ + diff --git a/third_party/ctags/awk.c b/third_party/ctags/awk.c new file mode 100644 index 000000000..45b49afb3 --- /dev/null +++ b/third_party/ctags/awk.c @@ -0,0 +1,83 @@ +// clang-format off +/* +* $Id: awk.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for AWK functions. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum eAwkKinds { + K_FUNCTION +} awkKind; + +static kindOption AwkKinds [] = { + { TRUE, 'f', "function", "functions" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void findAwkTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + if (strncmp ((const char*) line, "function", (size_t) 8) == 0 && + isspace ((int) line [8])) + { + const unsigned char *cp = line + 8; + + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + while (isspace ((int) *cp)) + ++cp; + if (*cp == '(') + makeSimpleTag (name, AwkKinds, K_FUNCTION); + vStringClear (name); + if (*cp != '\0') + ++cp; + } + } + vStringDelete (name); +} + +extern parserDefinition* AwkParser () +{ + static const char *const extensions [] = { "awk", "gawk", "mawk", NULL }; + parserDefinition* def = parserNew ("Awk"); + def->kinds = AwkKinds; + def->kindCount = KIND_COUNT (AwkKinds); + def->extensions = extensions; + def->parser = findAwkTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/basic.c b/third_party/ctags/basic.c new file mode 100644 index 000000000..c3b470579 --- /dev/null +++ b/third_party/ctags/basic.c @@ -0,0 +1,205 @@ +// clang-format off +/* + * $Id:$ + * + * Copyright (c) 2000-2006, Darren Hiebert, Elias Pschernig + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for BlitzBasic + * (BlitzMax), PureBasic and FreeBasic language files. For now, this is kept + * quite simple - but feel free to ask for more things added any time - + * patches are of course most welcome. + */ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/options.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* + * DATA DEFINITIONS + */ +typedef enum { + K_CONST, + K_FUNCTION, + K_LABEL, + K_TYPE, + K_VARIABLE, + K_ENUM +} BasicKind; + +typedef struct { + char const *token; + BasicKind kind; + int skip; +} KeyWord; + +static kindOption BasicKinds[] = { + {TRUE, 'c', "constant", "constants"}, + {TRUE, 'f', "function", "functions"}, + {TRUE, 'l', "label", "labels"}, + {TRUE, 't', "type", "types"}, + {TRUE, 'v', "variable", "variables"}, + {TRUE, 'g', "enum", "enumerations"} +}; + +static KeyWord blitzbasic_keywords[] = { + {"const", K_CONST, 0}, + {"global", K_VARIABLE, 0}, + {"dim", K_VARIABLE, 0}, + {"function", K_FUNCTION, 0}, + {"type", K_TYPE, 0}, + {NULL, 0, 0} +}; + +static KeyWord purebasic_keywords[] = { + {"newlist", K_VARIABLE, 0}, + {"global", K_VARIABLE, 0}, + {"dim", K_VARIABLE, 0}, + {"procedure", K_FUNCTION, 0}, + {"interface", K_TYPE, 0}, + {"structure", K_TYPE, 0}, + {NULL, 0, 0} +}; + +static KeyWord freebasic_keywords[] = { + {"const", K_CONST, 0}, + {"dim as", K_VARIABLE, 1}, + {"dim", K_VARIABLE, 0}, + {"common", K_VARIABLE, 0}, + {"function", K_FUNCTION, 0}, + {"sub", K_FUNCTION, 0}, + {"private sub", K_FUNCTION, 0}, + {"public sub", K_FUNCTION, 0}, + {"private function", K_FUNCTION, 0}, + {"public function", K_FUNCTION, 0}, + {"type", K_TYPE, 0}, + {"enum", K_ENUM, 0}, + {NULL, 0, 0} +}; + +/* + * FUNCTION DEFINITIONS + */ + +/* Match the name of a tag (function, variable, type, ...) starting at pos. */ +static char const *extract_name (char const *pos, vString * name) +{ + while (isspace (*pos)) + pos++; + vStringClear (name); + for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ','; pos++) + vStringPut (name, *pos); + vStringTerminate (name); + return pos; +} + +/* Match a keyword starting at p (case insensitive). */ +static int match_keyword (const char *p, KeyWord const *kw) +{ + vString *name; + size_t i; + int j; + for (i = 0; i < strlen (kw->token); i++) + { + if (tolower (p[i]) != kw->token[i]) + return 0; + } + name = vStringNew (); + p += i; + for (j = 0; j < 1 + kw->skip; j++) + { + p = extract_name (p, name); + } + makeSimpleTag (name, BasicKinds, kw->kind); + vStringDelete (name); + return 1; +} + +/* Match a "label:" style label. */ +static void match_colon_label (char const *p) +{ + char const *end = p + strlen (p) - 1; + while (isspace (*end)) + end--; + if (*end == ':') + { + vString *name = vStringNew (); + vStringNCatS (name, p, end - p); + makeSimpleTag (name, BasicKinds, K_LABEL); + vStringDelete (name); + } +} + +/* Match a ".label" style label. */ +static void match_dot_label (char const *p) +{ + if (*p == '.') + { + vString *name = vStringNew (); + extract_name (p + 1, name); + makeSimpleTag (name, BasicKinds, K_LABEL); + vStringDelete (name); + } +} + +static void findBasicTags (void) +{ + const char *line; + const char *extension = fileExtension (vStringValue (File.name)); + KeyWord *keywords; + + if (strcmp (extension, "bb") == 0) + keywords = blitzbasic_keywords; + else if (strcmp (extension, "pb") == 0) + keywords = purebasic_keywords; + else + keywords = freebasic_keywords; + + while ((line = (const char *) fileReadLine ()) != NULL) + { + const char *p = line; + KeyWord const *kw; + + while (isspace (*p)) + p++; + + /* Empty line? */ + if (!*p) + continue; + + /* In Basic, keywords always are at the start of the line. */ + for (kw = keywords; kw->token; kw++) + if (match_keyword (p, kw)) break; + + /* Is it a label? */ + if (strcmp (extension, "bb") == 0) + match_dot_label (p); + else + match_colon_label (p); + } +} + +parserDefinition *BasicParser (void) +{ + static char const *extensions[] = { "bas", "bi", "bb", "pb", NULL }; + parserDefinition *def = parserNew ("Basic"); + def->kinds = BasicKinds; + def->kindCount = KIND_COUNT (BasicKinds); + def->extensions = extensions; + def->parser = findBasicTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/beta.c b/third_party/ctags/beta.c new file mode 100644 index 000000000..168ea0b24 --- /dev/null +++ b/third_party/ctags/beta.c @@ -0,0 +1,323 @@ +// clang-format off +/* +* $Id: beta.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 1999-2000, Mjolner Informatics +* +* Written by Erik Corry +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for BETA language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/entry.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* +* MACROS +*/ +#define isbident(c) (identarray [(unsigned char) (c)]) + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FRAGMENT, K_PATTERN, K_SLOT, K_VIRTUAL +} betaKind; + +static kindOption BetaKinds [] = { + { TRUE, 'f', "fragment", "fragment definitions"}, + { FALSE, 'p', "pattern", "all patterns"}, + { TRUE, 's', "slot", "slots (fragment uses)"}, + { TRUE, 'v', "virtual", "patterns (virtual or rebound)"} +}; + +/* [A-Z_a-z0-9] */ +static const char identarray [256] = { +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32-47 !"#$%&'()*+'-./ */ +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 48-63 0123456789:;<=>? */ +0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 64-79 @ABCDEFGHIJKLMNO */ +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 80-95 PQRSTUVWXYZ [\]^_ */ +0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 96-111 `abcdefghijklmno */ +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 112-127 pqrstuvwxyz{|}~ */ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128- */ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* -255 */ + +/* +* FUNCTION DEFINITIONS +*/ + +static void makeBetaTag (const char* const name, const betaKind kind) +{ + if (BetaKinds [kind].enabled) + { + tagEntryInfo e; + initTagEntry (&e, name); + e.kindName = BetaKinds [kind].name; + e.kind = BetaKinds [kind].letter; + makeTagEntry (&e); + } +} + +static void findBetaTags (void) +{ + vString *line = vStringNew (); + boolean incomment = FALSE; + boolean inquote = FALSE; + boolean dovirtuals = BetaKinds [K_VIRTUAL].enabled; + boolean dopatterns = BetaKinds [K_PATTERN].enabled; + + do + { + boolean foundfragmenthere = FALSE; + /* find fragment definition (line that starts and ends with --) */ + int last; + int first; + int c; + + vStringClear (line); + + while ((c = fileGetc ()) != EOF && c != '\n' && c != '\r') + vStringPut (line, c); + + vStringTerminate (line); + + last = vStringLength (line) - 1; + first = 0; + /* skip white space at start and end of line */ + while (last && isspace ((int) vStringChar (line, last))) last--; + while (first < last && isspace ((int) vStringChar (line, first))) first++; + /* if line still has a reasonable length and ... */ + if (last - first > 4 && + (vStringChar (line, first) == '-' && + vStringChar (line, first + 1) == '-' && + vStringChar (line, last) == '-' && + vStringChar (line, last - 1) == '-')) + { + if (!incomment && !inquote) + { + foundfragmenthere = TRUE; + /* skip past -- and whitespace. Also skip back past 'dopart' + or 'attributes' to the :. We have to do this because there + is no sensible way to include whitespace in a ctags token + so the conventional space after the ':' would mess us up */ + last -= 2; + first += 2; + while (last && vStringChar (line, last) != ':') last--; + while (last && (isspace ((int) vStringChar (line, last-1)))) last--; + while (first < last && + (isspace ((int) vStringChar (line, first)) || + vStringChar (line, first) == '-')) + first++; + /* If there's anything left it is a fragment title */ + if (first < last - 1) + { + vStringChar (line, last) = 0; + if (strcasecmp ("LIB", vStringValue (line) + first) && + strcasecmp ("PROGRAM", vStringValue (line) + first)) + { + makeBetaTag (vStringValue (line) + first, K_FRAGMENT); + } + } + } + } else { + int pos = 0; + int len = vStringLength (line); + if (inquote) goto stringtext; + if (incomment) goto commenttext; + programtext: + for ( ; pos < len; pos++) + { + if (vStringChar (line, pos) == '\'') + { + pos++; + inquote = TRUE; + goto stringtext; + } + if (vStringChar (line, pos) == '{') + { + pos++; + incomment = TRUE; + goto commenttext; + } + if (vStringChar (line, pos) == '(' && pos < len - 1 && + vStringChar (line, pos+1) == '*') + { + pos +=2; + incomment = TRUE; + goto commenttext; + } + /* + * SLOT definition looks like this: + * <> + * or + * <> + */ + if (!foundfragmenthere && + vStringChar (line, pos) == '<' && + pos+1 < len && + vStringChar (line, pos+1) == '<' && + strstr (vStringValue (line) + pos, ">>")) + { + /* Found slot name, get start and end */ + int eoname; + char c2; + pos += 2; /* skip past << */ + /* skip past space before SLOT */ + while (pos < len && isspace ((int) vStringChar (line, pos))) + pos++; + /* skip past SLOT */ + if (pos+4 <= len && + !strncasecmp (vStringValue(line) + pos, "SLOT", (size_t)4)) + pos += 4; + /* skip past space after SLOT */ + while (pos < len && isspace ((int) vStringChar (line, pos))) + pos++; + eoname = pos; + /* skip to end of name */ + while (eoname < len && + (c2 = vStringChar (line, eoname)) != '>' && + c2 != ':' && + !isspace ((int) c2)) + eoname++; + if (eoname < len) + { + vStringChar (line, eoname) = 0; + if (strcasecmp ("LIB", vStringValue (line) + pos) && + strcasecmp ("PROGRAM", vStringValue (line) + pos) && + strcasecmp ("SLOT", vStringValue (line) + pos)) + { + makeBetaTag (vStringValue (line) + pos, K_SLOT); + } + } + if (eoname+1 < len) { + pos = eoname + 1; + } else { + pos = len; + continue; + } + } + /* Only patterns that are virtual, extensions of virtuals or + * final bindings are normally included so as not to overload + * totally. + * That means one of the forms name:: name:< or name::< + */ + if (!foundfragmenthere && + vStringChar (line, pos) == ':' && + (dopatterns || + (dovirtuals && + (vStringChar (line, pos+1) == ':' || + vStringChar (line, pos+1) == '<') + ) + ) + ) + { + /* Found pattern name, get start and end */ + int eoname = pos; + int soname; + while (eoname && isspace ((int) vStringChar (line, eoname-1))) + eoname--; + foundanothername: + /* terminate right after name */ + vStringChar (line, eoname) = 0; + soname = eoname; + while (soname && + isbident (vStringChar (line, soname-1))) + { + soname--; + } + if (soname != eoname) + { + makeBetaTag (vStringValue (line) + soname, K_PATTERN); + /* scan back past white space */ + while (soname && + isspace ((int) vStringChar (line, soname-1))) + soname--; + if (soname && vStringChar (line, soname-1) == ',') + { + /* we found a new pattern name before comma */ + eoname = soname; + goto foundanothername; + } + } + } + } + goto endofline; + commenttext: + for ( ; pos < len; pos++) + { + if (vStringChar (line, pos) == '*' && pos < len - 1 && + vStringChar (line, pos+1) == ')') + { + pos += 2; + incomment = FALSE; + goto programtext; + } + if (vStringChar (line, pos) == '}') + { + pos++; + incomment = FALSE; + goto programtext; + } + } + goto endofline; + stringtext: + for ( ; pos < len; pos++) + { + if (vStringChar (line, pos) == '\\') + { + if (pos < len - 1) pos++; + } + else if (vStringChar (line, pos) == '\'') + { + pos++; + /* support obsolete '' syntax */ + if (pos < len && vStringChar (line, pos) == '\'') + { + continue; + } + inquote = FALSE; + goto programtext; + } + } + } + endofline: + inquote = FALSE; /* This shouldn't really make a difference */ + } while (!feof (File.fp)); + vStringDelete (line); +} + +extern parserDefinition* BetaParser (void) +{ + static const char *const extensions [] = { "bet", NULL }; + parserDefinition* def = parserNew ("BETA"); + def->kinds = BetaKinds; + def->kindCount = KIND_COUNT (BetaKinds); + def->extensions = extensions; + def->parser = findBetaTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/c.c b/third_party/ctags/c.c new file mode 100644 index 000000000..515d3d1e3 --- /dev/null +++ b/third_party/ctags/c.c @@ -0,0 +1,2934 @@ +// clang-format off +/* +* $Id: c.c 689 2008-12-13 21:17:36Z elliotth $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for parsing and scanning C, C++ and Java +* source files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/runtime/runtime.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/get.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" + +/* +* MACROS +*/ + +#define activeToken(st) ((st)->token [(int) (st)->tokenIndex]) +#define parentDecl(st) ((st)->parent == NULL ? \ + DECL_NONE : (st)->parent->declaration) +#define isType(token,t) (boolean) ((token)->type == (t)) +#define insideEnumBody(st) ((st)->parent == NULL ? FALSE : \ + (boolean) ((st)->parent->declaration == DECL_ENUM)) +#define isExternCDecl(st,c) (boolean) ((c) == STRING_SYMBOL && \ + ! (st)->haveQualifyingName && (st)->scope == SCOPE_EXTERN) + +#define isOneOf(c,s) (boolean) (strchr ((s), (c)) != NULL) + +#define isHighChar(c) ((c) != EOF && (unsigned char)(c) >= 0xc0) + +/* +* DATA DECLARATIONS +*/ + +enum { NumTokens = 3 }; + +typedef enum eException { + ExceptionNone, ExceptionEOF, ExceptionFormattingError, + ExceptionBraceFormattingError +} exception_t; + +/* Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_ATTRIBUTE, KEYWORD_ABSTRACT, + KEYWORD_BOOLEAN, KEYWORD_BYTE, KEYWORD_BAD_STATE, KEYWORD_BAD_TRANS, + KEYWORD_BIND, KEYWORD_BIND_VAR, KEYWORD_BIT, + KEYWORD_CASE, KEYWORD_CATCH, KEYWORD_CHAR, KEYWORD_CLASS, KEYWORD_CONST, + KEYWORD_CONSTRAINT, KEYWORD_COVERAGE_BLOCK, KEYWORD_COVERAGE_DEF, + KEYWORD_DEFAULT, KEYWORD_DELEGATE, KEYWORD_DELETE, KEYWORD_DO, + KEYWORD_DOUBLE, + KEYWORD_ELSE, KEYWORD_ENUM, KEYWORD_EXPLICIT, KEYWORD_EXTERN, + KEYWORD_EXTENDS, KEYWORD_EVENT, + KEYWORD_FINAL, KEYWORD_FLOAT, KEYWORD_FOR, KEYWORD_FOREACH, + KEYWORD_FRIEND, KEYWORD_FUNCTION, + KEYWORD_GOTO, + KEYWORD_IF, KEYWORD_IMPLEMENTS, KEYWORD_IMPORT, KEYWORD_INLINE, KEYWORD_INT, + KEYWORD_INOUT, KEYWORD_INPUT, KEYWORD_INTEGER, KEYWORD_INTERFACE, + KEYWORD_INTERNAL, + KEYWORD_LOCAL, KEYWORD_LONG, + KEYWORD_M_BAD_STATE, KEYWORD_M_BAD_TRANS, KEYWORD_M_STATE, KEYWORD_M_TRANS, + KEYWORD_MUTABLE, + KEYWORD_NAMESPACE, KEYWORD_NEW, KEYWORD_NEWCOV, KEYWORD_NATIVE, + KEYWORD_OPERATOR, KEYWORD_OUTPUT, KEYWORD_OVERLOAD, KEYWORD_OVERRIDE, + KEYWORD_PACKED, KEYWORD_PORT, KEYWORD_PACKAGE, KEYWORD_PRIVATE, + KEYWORD_PROGRAM, KEYWORD_PROTECTED, KEYWORD_PUBLIC, + KEYWORD_REGISTER, KEYWORD_RETURN, + KEYWORD_SHADOW, KEYWORD_STATE, + KEYWORD_SHORT, KEYWORD_SIGNED, KEYWORD_STATIC, KEYWORD_STRING, + KEYWORD_STRUCT, KEYWORD_SWITCH, KEYWORD_SYNCHRONIZED, + KEYWORD_TASK, KEYWORD_TEMPLATE, KEYWORD_THIS, KEYWORD_THROW, + KEYWORD_THROWS, KEYWORD_TRANSIENT, KEYWORD_TRANS, KEYWORD_TRANSITION, + KEYWORD_TRY, KEYWORD_TYPEDEF, KEYWORD_TYPENAME, + KEYWORD_UINT, KEYWORD_ULONG, KEYWORD_UNION, KEYWORD_UNSIGNED, KEYWORD_USHORT, + KEYWORD_USING, + KEYWORD_VIRTUAL, KEYWORD_VOID, KEYWORD_VOLATILE, + KEYWORD_WCHAR_T, KEYWORD_WHILE +} keywordId; + +/* Used to determine whether keyword is valid for the current language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; + short isValid [5]; /* indicates languages for which kw is valid */ +} keywordDesc; + +/* Used for reporting the type of object parsed by nextToken (). + */ +typedef enum eTokenType { + TOKEN_NONE, /* none */ + TOKEN_ARGS, /* a parenthetical pair and its contents */ + TOKEN_BRACE_CLOSE, + TOKEN_BRACE_OPEN, + TOKEN_COLON, /* the colon character */ + TOKEN_COMMA, /* the comma character */ + TOKEN_DOUBLE_COLON, /* double colon indicates nested-name-specifier */ + TOKEN_KEYWORD, + TOKEN_NAME, /* an unknown name */ + TOKEN_PACKAGE, /* a Java package name */ + TOKEN_PAREN_NAME, /* a single name in parentheses */ + TOKEN_SEMICOLON, /* the semicolon character */ + TOKEN_SPEC, /* a storage class specifier, qualifier, type, etc. */ + TOKEN_COUNT +} tokenType; + +/* This describes the scoping of the current statement. + */ +typedef enum eTagScope { + SCOPE_GLOBAL, /* no storage class specified */ + SCOPE_STATIC, /* static storage class */ + SCOPE_EXTERN, /* external storage class */ + SCOPE_FRIEND, /* declares access only */ + SCOPE_TYPEDEF, /* scoping depends upon context */ + SCOPE_COUNT +} tagScope; + +typedef enum eDeclaration { + DECL_NONE, + DECL_BASE, /* base type (default) */ + DECL_CLASS, + DECL_ENUM, + DECL_EVENT, + DECL_FUNCTION, + DECL_IGNORE, /* non-taggable "declaration" */ + DECL_INTERFACE, + DECL_NAMESPACE, + DECL_NOMANGLE, /* C++ name demangling block */ + DECL_PACKAGE, + DECL_PROGRAM, /* Vera program */ + DECL_STRUCT, + DECL_TASK, /* Vera task */ + DECL_UNION, + DECL_COUNT +} declType; + +typedef enum eVisibilityType { + ACCESS_UNDEFINED, + ACCESS_LOCAL, + ACCESS_PRIVATE, + ACCESS_PROTECTED, + ACCESS_PUBLIC, + ACCESS_DEFAULT, /* Java-specific */ + ACCESS_COUNT +} accessType; + +/* Information about the parent class of a member (if any). + */ +typedef struct sMemberInfo { + accessType access; /* access of current statement */ + accessType accessDefault; /* access default for current statement */ +} memberInfo; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString* name; /* the name of the token */ + unsigned long lineNumber; /* line number of tag */ + fpos_t filePosition; /* file position of line containing name */ +} tokenInfo; + +typedef enum eImplementation { + IMP_DEFAULT, + IMP_ABSTRACT, + IMP_VIRTUAL, + IMP_PURE_VIRTUAL, + IMP_COUNT +} impType; + +/* Describes the statement currently undergoing analysis. + */ +typedef struct sStatementInfo { + tagScope scope; + declType declaration; /* specifier associated with TOKEN_SPEC */ + boolean gotName; /* was a name parsed yet? */ + boolean haveQualifyingName; /* do we have a name we are considering? */ + boolean gotParenName; /* was a name inside parentheses parsed yet? */ + boolean gotArgs; /* was a list of parameters parsed yet? */ + boolean isPointer; /* is 'name' a pointer? */ + boolean inFunction; /* are we inside of a function? */ + boolean assignment; /* have we handled an '='? */ + boolean notVariable; /* has a variable declaration been disqualified ? */ + impType implementation; /* abstract or concrete implementation? */ + unsigned int tokenIndex; /* currently active token */ + tokenInfo* token [(int) NumTokens]; + tokenInfo* context; /* accumulated scope of current statement */ + tokenInfo* blockName; /* name of current block */ + memberInfo member; /* information regarding parent class/struct */ + vString* parentClasses; /* parent classes */ + struct sStatementInfo *parent; /* statement we are nested within */ +} statementInfo; + +/* Describes the type of tag being generated. + */ +typedef enum eTagType { + TAG_UNDEFINED, + TAG_CLASS, /* class name */ + TAG_ENUM, /* enumeration name */ + TAG_ENUMERATOR, /* enumerator (enumeration value) */ + TAG_EVENT, /* event */ + TAG_FIELD, /* field (Java) */ + TAG_FUNCTION, /* function definition */ + TAG_INTERFACE, /* interface declaration */ + TAG_LOCAL, /* local variable definition */ + TAG_MEMBER, /* structure, class or interface member */ + TAG_METHOD, /* method declaration */ + TAG_NAMESPACE, /* namespace name */ + TAG_PACKAGE, /* package name */ + TAG_PROGRAM, /* program name */ + TAG_PROPERTY, /* property name */ + TAG_PROTOTYPE, /* function prototype or declaration */ + TAG_STRUCT, /* structure name */ + TAG_TASK, /* task name */ + TAG_TYPEDEF, /* typedef name */ + TAG_UNION, /* union name */ + TAG_VARIABLE, /* variable definition */ + TAG_EXTERN_VAR, /* external variable declaration */ + TAG_COUNT /* must be last */ +} tagType; + +typedef struct sParenInfo { + boolean isPointer; + boolean isParamList; + boolean isKnrParamList; + boolean isNameCandidate; + boolean invalidContents; + boolean nestedArgs; + unsigned int parameterCount; +} parenInfo; + +/* +* DATA DEFINITIONS +*/ + +static jmp_buf Exception; + +static langType Lang_c; +static langType Lang_cpp; +static langType Lang_csharp; +static langType Lang_java; +static langType Lang_vera; +static vString *Signature; +static boolean CollectingSignature; + +/* Number used to uniquely identify anonymous structs and unions. */ +static int AnonymousID = 0; + +/* Used to index into the CKinds table. */ +typedef enum { + CK_UNDEFINED = -1, + CK_CLASS, CK_DEFINE, CK_ENUMERATOR, CK_FUNCTION, + CK_ENUMERATION, CK_LOCAL, CK_MEMBER, CK_NAMESPACE, CK_PROTOTYPE, + CK_STRUCT, CK_TYPEDEF, CK_UNION, CK_VARIABLE, + CK_EXTERN_VARIABLE +} cKind; + +static kindOption CKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'd', "macro", "macro definitions"}, + { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"}, + { TRUE, 'f', "function", "function definitions"}, + { TRUE, 'g', "enum", "enumeration names"}, + { FALSE, 'l', "local", "local variables"}, + { TRUE, 'm', "member", "class, struct, and union members"}, + { TRUE, 'n', "namespace", "namespaces"}, + { FALSE, 'p', "prototype", "function prototypes"}, + { TRUE, 's', "struct", "structure names"}, + { TRUE, 't', "typedef", "typedefs"}, + { TRUE, 'u', "union", "union names"}, + { TRUE, 'v', "variable", "variable definitions"}, + { FALSE, 'x', "externvar", "external and forward variable declarations"}, +}; + +typedef enum { + CSK_UNDEFINED = -1, + CSK_CLASS, CSK_DEFINE, CSK_ENUMERATOR, CSK_EVENT, CSK_FIELD, + CSK_ENUMERATION, CSK_INTERFACE, CSK_LOCAL, CSK_METHOD, + CSK_NAMESPACE, CSK_PROPERTY, CSK_STRUCT, CSK_TYPEDEF +} csharpKind; + +static kindOption CsharpKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'd', "macro", "macro definitions"}, + { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"}, + { TRUE, 'E', "event", "events"}, + { TRUE, 'f', "field", "fields"}, + { TRUE, 'g', "enum", "enumeration names"}, + { TRUE, 'i', "interface", "interfaces"}, + { FALSE, 'l', "local", "local variables"}, + { TRUE, 'm', "method", "methods"}, + { TRUE, 'n', "namespace", "namespaces"}, + { TRUE, 'p', "property", "properties"}, + { TRUE, 's', "struct", "structure names"}, + { TRUE, 't', "typedef", "typedefs"}, +}; + +/* Used to index into the JavaKinds table. */ +typedef enum { + JK_UNDEFINED = -1, + JK_CLASS, JK_ENUM_CONSTANT, JK_FIELD, JK_ENUM, JK_INTERFACE, + JK_LOCAL, JK_METHOD, JK_PACKAGE, JK_ACCESS, JK_CLASS_PREFIX +} javaKind; + +static kindOption JavaKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'e', "enum constant", "enum constants"}, + { TRUE, 'f', "field", "fields"}, + { TRUE, 'g', "enum", "enum types"}, + { TRUE, 'i', "interface", "interfaces"}, + { FALSE, 'l', "local", "local variables"}, + { TRUE, 'm', "method", "methods"}, + { TRUE, 'p', "package", "packages"}, +}; + +/* Used to index into the VeraKinds table. */ +typedef enum { + VK_UNDEFINED = -1, + VK_CLASS, VK_DEFINE, VK_ENUMERATOR, VK_FUNCTION, + VK_ENUMERATION, VK_LOCAL, VK_MEMBER, VK_PROGRAM, VK_PROTOTYPE, + VK_TASK, VK_TYPEDEF, VK_VARIABLE, + VK_EXTERN_VARIABLE +} veraKind; + +static kindOption VeraKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'd', "macro", "macro definitions"}, + { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"}, + { TRUE, 'f', "function", "function definitions"}, + { TRUE, 'g', "enum", "enumeration names"}, + { FALSE, 'l', "local", "local variables"}, + { TRUE, 'm', "member", "class, struct, and union members"}, + { TRUE, 'p', "program", "programs"}, + { FALSE, 'P', "prototype", "function prototypes"}, + { TRUE, 't', "task", "tasks"}, + { TRUE, 'T', "typedef", "typedefs"}, + { TRUE, 'v', "variable", "variable definitions"}, + { FALSE, 'x', "externvar", "external variable declarations"} +}; + +static const keywordDesc KeywordTable [] = { + /* C++ */ + /* ANSI C | C# Java */ + /* | | | | Vera */ + /* keyword keyword ID | | | | | */ + { "__attribute__", KEYWORD_ATTRIBUTE, { 1, 1, 1, 0, 0 } }, + { "abstract", KEYWORD_ABSTRACT, { 0, 0, 1, 1, 0 } }, + { "bad_state", KEYWORD_BAD_STATE, { 0, 0, 0, 0, 1 } }, + { "bad_trans", KEYWORD_BAD_TRANS, { 0, 0, 0, 0, 1 } }, + { "bind", KEYWORD_BIND, { 0, 0, 0, 0, 1 } }, + { "bind_var", KEYWORD_BIND_VAR, { 0, 0, 0, 0, 1 } }, + { "bit", KEYWORD_BIT, { 0, 0, 0, 0, 1 } }, + { "boolean", KEYWORD_BOOLEAN, { 0, 0, 0, 1, 0 } }, + { "byte", KEYWORD_BYTE, { 0, 0, 0, 1, 0 } }, + { "case", KEYWORD_CASE, { 1, 1, 1, 1, 0 } }, + { "catch", KEYWORD_CATCH, { 0, 1, 1, 0, 0 } }, + { "char", KEYWORD_CHAR, { 1, 1, 1, 1, 0 } }, + { "class", KEYWORD_CLASS, { 0, 1, 1, 1, 1 } }, + { "const", KEYWORD_CONST, { 1, 1, 1, 1, 0 } }, + { "constraint", KEYWORD_CONSTRAINT, { 0, 0, 0, 0, 1 } }, + { "coverage_block", KEYWORD_COVERAGE_BLOCK, { 0, 0, 0, 0, 1 } }, + { "coverage_def", KEYWORD_COVERAGE_DEF, { 0, 0, 0, 0, 1 } }, + { "do", KEYWORD_DO, { 1, 1, 1, 1, 0 } }, + { "default", KEYWORD_DEFAULT, { 1, 1, 1, 1, 0 } }, + { "delegate", KEYWORD_DELEGATE, { 0, 0, 1, 0, 0 } }, + { "delete", KEYWORD_DELETE, { 0, 1, 0, 0, 0 } }, + { "double", KEYWORD_DOUBLE, { 1, 1, 1, 1, 0 } }, + { "else", KEYWORD_ELSE, { 1, 1, 1, 1, 0 } }, + { "enum", KEYWORD_ENUM, { 1, 1, 1, 1, 1 } }, + { "event", KEYWORD_EVENT, { 0, 0, 1, 0, 1 } }, + { "explicit", KEYWORD_EXPLICIT, { 0, 1, 1, 0, 0 } }, + { "extends", KEYWORD_EXTENDS, { 0, 0, 0, 1, 1 } }, + { "extern", KEYWORD_EXTERN, { 1, 1, 1, 0, 1 } }, + { "final", KEYWORD_FINAL, { 0, 0, 0, 1, 0 } }, + { "float", KEYWORD_FLOAT, { 1, 1, 1, 1, 0 } }, + { "for", KEYWORD_FOR, { 1, 1, 1, 1, 0 } }, + { "foreach", KEYWORD_FOREACH, { 0, 0, 1, 0, 0 } }, + { "friend", KEYWORD_FRIEND, { 0, 1, 0, 0, 0 } }, + { "function", KEYWORD_FUNCTION, { 0, 0, 0, 0, 1 } }, + { "goto", KEYWORD_GOTO, { 1, 1, 1, 1, 0 } }, + { "if", KEYWORD_IF, { 1, 1, 1, 1, 0 } }, + { "implements", KEYWORD_IMPLEMENTS, { 0, 0, 0, 1, 0 } }, + { "import", KEYWORD_IMPORT, { 0, 0, 0, 1, 0 } }, + { "inline", KEYWORD_INLINE, { 0, 1, 0, 0, 0 } }, + { "inout", KEYWORD_INOUT, { 0, 0, 0, 0, 1 } }, + { "input", KEYWORD_INPUT, { 0, 0, 0, 0, 1 } }, + { "int", KEYWORD_INT, { 1, 1, 1, 1, 0 } }, + { "integer", KEYWORD_INTEGER, { 0, 0, 0, 0, 1 } }, + { "interface", KEYWORD_INTERFACE, { 0, 0, 1, 1, 1 } }, + { "internal", KEYWORD_INTERNAL, { 0, 0, 1, 0, 0 } }, + { "local", KEYWORD_LOCAL, { 0, 0, 0, 0, 1 } }, + { "long", KEYWORD_LONG, { 1, 1, 1, 1, 0 } }, + { "m_bad_state", KEYWORD_M_BAD_STATE, { 0, 0, 0, 0, 1 } }, + { "m_bad_trans", KEYWORD_M_BAD_TRANS, { 0, 0, 0, 0, 1 } }, + { "m_state", KEYWORD_M_STATE, { 0, 0, 0, 0, 1 } }, + { "m_trans", KEYWORD_M_TRANS, { 0, 0, 0, 0, 1 } }, + { "mutable", KEYWORD_MUTABLE, { 0, 1, 0, 0, 0 } }, + { "namespace", KEYWORD_NAMESPACE, { 0, 1, 1, 0, 0 } }, + { "native", KEYWORD_NATIVE, { 0, 0, 0, 1, 0 } }, + { "new", KEYWORD_NEW, { 0, 1, 1, 1, 0 } }, + { "newcov", KEYWORD_NEWCOV, { 0, 0, 0, 0, 1 } }, + { "operator", KEYWORD_OPERATOR, { 0, 1, 1, 0, 0 } }, + { "output", KEYWORD_OUTPUT, { 0, 0, 0, 0, 1 } }, + { "overload", KEYWORD_OVERLOAD, { 0, 1, 0, 0, 0 } }, + { "override", KEYWORD_OVERRIDE, { 0, 0, 1, 0, 0 } }, + { "package", KEYWORD_PACKAGE, { 0, 0, 0, 1, 0 } }, + { "packed", KEYWORD_PACKED, { 0, 0, 0, 0, 1 } }, + { "port", KEYWORD_PORT, { 0, 0, 0, 0, 1 } }, + { "private", KEYWORD_PRIVATE, { 0, 1, 1, 1, 0 } }, + { "program", KEYWORD_PROGRAM, { 0, 0, 0, 0, 1 } }, + { "protected", KEYWORD_PROTECTED, { 0, 1, 1, 1, 1 } }, + { "public", KEYWORD_PUBLIC, { 0, 1, 1, 1, 1 } }, + { "register", KEYWORD_REGISTER, { 1, 1, 0, 0, 0 } }, + { "return", KEYWORD_RETURN, { 1, 1, 1, 1, 0 } }, + { "shadow", KEYWORD_SHADOW, { 0, 0, 0, 0, 1 } }, + { "short", KEYWORD_SHORT, { 1, 1, 1, 1, 0 } }, + { "signed", KEYWORD_SIGNED, { 1, 1, 0, 0, 0 } }, + { "state", KEYWORD_STATE, { 0, 0, 0, 0, 1 } }, + { "static", KEYWORD_STATIC, { 1, 1, 1, 1, 1 } }, + { "string", KEYWORD_STRING, { 0, 0, 1, 0, 1 } }, + { "struct", KEYWORD_STRUCT, { 1, 1, 1, 0, 0 } }, + { "switch", KEYWORD_SWITCH, { 1, 1, 1, 1, 0 } }, + { "synchronized", KEYWORD_SYNCHRONIZED, { 0, 0, 0, 1, 0 } }, + { "task", KEYWORD_TASK, { 0, 0, 0, 0, 1 } }, + { "template", KEYWORD_TEMPLATE, { 0, 1, 0, 0, 0 } }, + { "this", KEYWORD_THIS, { 0, 1, 1, 1, 0 } }, + { "throw", KEYWORD_THROW, { 0, 1, 1, 1, 0 } }, + { "throws", KEYWORD_THROWS, { 0, 0, 0, 1, 0 } }, + { "trans", KEYWORD_TRANS, { 0, 0, 0, 0, 1 } }, + { "transition", KEYWORD_TRANSITION, { 0, 0, 0, 0, 1 } }, + { "transient", KEYWORD_TRANSIENT, { 0, 0, 0, 1, 0 } }, + { "try", KEYWORD_TRY, { 0, 1, 1, 0, 0 } }, + { "typedef", KEYWORD_TYPEDEF, { 1, 1, 1, 0, 1 } }, + { "typename", KEYWORD_TYPENAME, { 0, 1, 0, 0, 0 } }, + { "uint", KEYWORD_UINT, { 0, 0, 1, 0, 0 } }, + { "ulong", KEYWORD_ULONG, { 0, 0, 1, 0, 0 } }, + { "union", KEYWORD_UNION, { 1, 1, 0, 0, 0 } }, + { "unsigned", KEYWORD_UNSIGNED, { 1, 1, 1, 0, 0 } }, + { "ushort", KEYWORD_USHORT, { 0, 0, 1, 0, 0 } }, + { "using", KEYWORD_USING, { 0, 1, 1, 0, 0 } }, + { "virtual", KEYWORD_VIRTUAL, { 0, 1, 1, 0, 1 } }, + { "void", KEYWORD_VOID, { 1, 1, 1, 1, 1 } }, + { "volatile", KEYWORD_VOLATILE, { 1, 1, 1, 1, 0 } }, + { "wchar_t", KEYWORD_WCHAR_T, { 1, 1, 1, 0, 0 } }, + { "while", KEYWORD_WHILE, { 1, 1, 1, 1, 0 } } +}; + +/* +* FUNCTION PROTOTYPES +*/ +static void createTags (const unsigned int nestLevel, statementInfo *const parent); + +/* +* FUNCTION DEFINITIONS +*/ + +extern boolean includingDefineTags (void) +{ + return CKinds [CK_DEFINE].enabled; +} + +/* +* Token management +*/ + +static void initToken (tokenInfo* const token) +{ + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + vStringClear (token->name); +} + +static void advanceToken (statementInfo* const st) +{ + if (st->tokenIndex >= (unsigned int) NumTokens - 1) + st->tokenIndex = 0; + else + ++st->tokenIndex; + initToken (st->token [st->tokenIndex]); +} + +static tokenInfo *prevToken (const statementInfo *const st, unsigned int n) +{ + unsigned int tokenIndex; + unsigned int num = (unsigned int) NumTokens; + Assert (n < num); + tokenIndex = (st->tokenIndex + num - n) % num; + return st->token [tokenIndex]; +} + +static void setToken (statementInfo *const st, const tokenType type) +{ + tokenInfo *token; + token = activeToken (st); + initToken (token); + token->type = type; +} + +static void retardToken (statementInfo *const st) +{ + if (st->tokenIndex == 0) + st->tokenIndex = (unsigned int) NumTokens - 1; + else + --st->tokenIndex; + setToken (st, TOKEN_NONE); +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->name = vStringNew (); + initToken (token); + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + if (token != NULL) + { + vStringDelete (token->name); + eFree (token); + } +} + +static const char *accessString (const accessType access) +{ + static const char *const names [] = { + "?", "local", "private", "protected", "public", "default" + }; + Assert (sizeof (names) / sizeof (names [0]) == ACCESS_COUNT); + Assert ((int) access < ACCESS_COUNT); + return names [(int) access]; +} + +static const char *implementationString (const impType imp) +{ + static const char *const names [] ={ + "?", "abstract", "virtual", "pure virtual" + }; + Assert (sizeof (names) / sizeof (names [0]) == IMP_COUNT); + Assert ((int) imp < IMP_COUNT); + return names [(int) imp]; +} + +/* +* Debugging functions +*/ + +#ifdef DEBUG + +#define boolString(c) ((c) ? "TRUE" : "FALSE") + +static const char *tokenString (const tokenType type) +{ + static const char *const names [] = { + "none", "args", "}", "{", "colon", "comma", "double colon", "keyword", + "name", "package", "paren-name", "semicolon", "specifier" + }; + Assert (sizeof (names) / sizeof (names [0]) == TOKEN_COUNT); + Assert ((int) type < TOKEN_COUNT); + return names [(int) type]; +} + +static const char *scopeString (const tagScope scope) +{ + static const char *const names [] = { + "global", "static", "extern", "friend", "typedef" + }; + Assert (sizeof (names) / sizeof (names [0]) == SCOPE_COUNT); + Assert ((int) scope < SCOPE_COUNT); + return names [(int) scope]; +} + +static const char *declString (const declType declaration) +{ + static const char *const names [] = { + "?", "base", "class", "enum", "event", "function", "ignore", + "interface", "namespace", "no mangle", "package", "program", + "struct", "task", "union", + }; + Assert (sizeof (names) / sizeof (names [0]) == DECL_COUNT); + Assert ((int) declaration < DECL_COUNT); + return names [(int) declaration]; +} + +static const char *keywordString (const keywordId keyword) +{ + const size_t count = sizeof (KeywordTable) / sizeof (KeywordTable [0]); + const char *name = "none"; + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc *p = &KeywordTable [i]; + if (p->id == keyword) + { + name = p->name; + break; + } + } + return name; +} + +static void __unused pt (tokenInfo *const token) +{ + if (isType (token, TOKEN_NAME)) + printf ("type: %-12s: %-13s line: %lu\n", + tokenString (token->type), vStringValue (token->name), + token->lineNumber); + else if (isType (token, TOKEN_KEYWORD)) + printf ("type: %-12s: %-13s line: %lu\n", + tokenString (token->type), keywordString (token->keyword), + token->lineNumber); + else + printf ("type: %-12s line: %lu\n", + tokenString (token->type), token->lineNumber); +} + +static void __unused ps (statementInfo *const st) +{ + unsigned int i; + printf ("scope: %s decl: %s gotName: %s gotParenName: %s\n", + scopeString (st->scope), declString (st->declaration), + boolString (st->gotName), boolString (st->gotParenName)); + printf ("haveQualifyingName: %s\n", boolString (st->haveQualifyingName)); + printf ("access: %s default: %s\n", accessString (st->member.access), + accessString (st->member.accessDefault)); + printf ("token : "); + pt (activeToken (st)); + for (i = 1 ; i < (unsigned int) NumTokens ; ++i) + { + printf ("prev %u : ", i); + pt (prevToken (st, i)); + } + printf ("context: "); + pt (st->context); +} + +#endif + +/* +* Statement management +*/ + +static boolean isContextualKeyword (const tokenInfo *const token) +{ + boolean result; + switch (token->keyword) + { + case KEYWORD_CLASS: + case KEYWORD_ENUM: + case KEYWORD_INTERFACE: + case KEYWORD_NAMESPACE: + case KEYWORD_STRUCT: + case KEYWORD_UNION: + result = TRUE; + break; + + default: result = FALSE; break; + } + return result; +} + +static boolean isContextualStatement (const statementInfo *const st) +{ + boolean result = FALSE; + if (st != NULL) switch (st->declaration) + { + case DECL_CLASS: + case DECL_ENUM: + case DECL_INTERFACE: + case DECL_NAMESPACE: + case DECL_STRUCT: + case DECL_UNION: + result = TRUE; + break; + + default: result = FALSE; break; + } + return result; +} + +static boolean isMember (const statementInfo *const st) +{ + boolean result; + if (isType (st->context, TOKEN_NAME)) + result = TRUE; + else + result = (boolean) + (st->parent != NULL && isContextualStatement (st->parent)); + return result; +} + +static void initMemberInfo (statementInfo *const st) +{ + accessType accessDefault = ACCESS_UNDEFINED; + + if (st->parent != NULL) switch (st->parent->declaration) + { + case DECL_ENUM: + accessDefault = (isLanguage (Lang_java) ? ACCESS_PUBLIC : ACCESS_UNDEFINED); + break; + case DECL_NAMESPACE: + accessDefault = ACCESS_UNDEFINED; + break; + + case DECL_CLASS: + if (isLanguage (Lang_java)) + accessDefault = ACCESS_DEFAULT; + else + accessDefault = ACCESS_PRIVATE; + break; + + case DECL_INTERFACE: + case DECL_STRUCT: + case DECL_UNION: + accessDefault = ACCESS_PUBLIC; + break; + + default: break; + } + st->member.accessDefault = accessDefault; + st->member.access = accessDefault; +} + +static void reinitStatement (statementInfo *const st, const boolean partial) +{ + unsigned int i; + + if (! partial) + { + st->scope = SCOPE_GLOBAL; + if (isContextualStatement (st->parent)) + st->declaration = DECL_BASE; + else + st->declaration = DECL_NONE; + } + st->gotParenName = FALSE; + st->isPointer = FALSE; + st->inFunction = FALSE; + st->assignment = FALSE; + st->notVariable = FALSE; + st->implementation = IMP_DEFAULT; + st->gotArgs = FALSE; + st->gotName = FALSE; + st->haveQualifyingName = FALSE; + st->tokenIndex = 0; + + if (st->parent != NULL) + st->inFunction = st->parent->inFunction; + + for (i = 0 ; i < (unsigned int) NumTokens ; ++i) + initToken (st->token [i]); + + initToken (st->context); + + /* Keep the block name, so that a variable following after a comma will + * still have the structure name. + */ + if (! partial) + initToken (st->blockName); + + vStringClear (st->parentClasses); + + /* Init member info. + */ + if (! partial) + st->member.access = st->member.accessDefault; +} + +static void initStatement (statementInfo *const st, statementInfo *const parent) +{ + st->parent = parent; + initMemberInfo (st); + reinitStatement (st, FALSE); +} + +/* +* Tag generation functions +*/ +static cKind cTagKind (const tagType type) +{ + cKind result = CK_UNDEFINED; + switch (type) + { + case TAG_CLASS: result = CK_CLASS; break; + case TAG_ENUM: result = CK_ENUMERATION; break; + case TAG_ENUMERATOR: result = CK_ENUMERATOR; break; + case TAG_FUNCTION: result = CK_FUNCTION; break; + case TAG_LOCAL: result = CK_LOCAL; break; + case TAG_MEMBER: result = CK_MEMBER; break; + case TAG_NAMESPACE: result = CK_NAMESPACE; break; + case TAG_PROTOTYPE: result = CK_PROTOTYPE; break; + case TAG_STRUCT: result = CK_STRUCT; break; + case TAG_TYPEDEF: result = CK_TYPEDEF; break; + case TAG_UNION: result = CK_UNION; break; + case TAG_VARIABLE: result = CK_VARIABLE; break; + case TAG_EXTERN_VAR: result = CK_EXTERN_VARIABLE; break; + + default: Assert ("Bad C tag type" == NULL); break; + } + return result; +} + +static csharpKind csharpTagKind (const tagType type) +{ + csharpKind result = CSK_UNDEFINED; + switch (type) + { + case TAG_CLASS: result = CSK_CLASS; break; + case TAG_ENUM: result = CSK_ENUMERATION; break; + case TAG_ENUMERATOR: result = CSK_ENUMERATOR; break; + case TAG_EVENT: result = CSK_EVENT; break; + case TAG_FIELD: result = CSK_FIELD ; break; + case TAG_INTERFACE: result = CSK_INTERFACE; break; + case TAG_LOCAL: result = CSK_LOCAL; break; + case TAG_METHOD: result = CSK_METHOD; break; + case TAG_NAMESPACE: result = CSK_NAMESPACE; break; + case TAG_PROPERTY: result = CSK_PROPERTY; break; + case TAG_STRUCT: result = CSK_STRUCT; break; + case TAG_TYPEDEF: result = CSK_TYPEDEF; break; + + default: Assert ("Bad C# tag type" == NULL); break; + } + return result; +} + +static javaKind javaTagKind (const tagType type) +{ + javaKind result = JK_UNDEFINED; + switch (type) + { + case TAG_CLASS: result = JK_CLASS; break; + case TAG_ENUM: result = JK_ENUM; break; + case TAG_ENUMERATOR: result = JK_ENUM_CONSTANT; break; + case TAG_FIELD: result = JK_FIELD; break; + case TAG_INTERFACE: result = JK_INTERFACE; break; + case TAG_LOCAL: result = JK_LOCAL; break; + case TAG_METHOD: result = JK_METHOD; break; + case TAG_PACKAGE: result = JK_PACKAGE; break; + + default: Assert ("Bad Java tag type" == NULL); break; + } + return result; +} + +static veraKind veraTagKind (const tagType type) { + veraKind result = VK_UNDEFINED; + switch (type) + { + case TAG_CLASS: result = VK_CLASS; break; + case TAG_ENUM: result = VK_ENUMERATION; break; + case TAG_ENUMERATOR: result = VK_ENUMERATOR; break; + case TAG_FUNCTION: result = VK_FUNCTION; break; + case TAG_LOCAL: result = VK_LOCAL; break; + case TAG_MEMBER: result = VK_MEMBER; break; + case TAG_PROGRAM: result = VK_PROGRAM; break; + case TAG_PROTOTYPE: result = VK_PROTOTYPE; break; + case TAG_TASK: result = VK_TASK; break; + case TAG_TYPEDEF: result = VK_TYPEDEF; break; + case TAG_VARIABLE: result = VK_VARIABLE; break; + case TAG_EXTERN_VAR: result = VK_EXTERN_VARIABLE; break; + + default: Assert ("Bad Vera tag type" == NULL); break; + } + return result; +} + +static const char *tagName (const tagType type) +{ + const char* result; + if (isLanguage (Lang_csharp)) + result = CsharpKinds [csharpTagKind (type)].name; + else if (isLanguage (Lang_java)) + result = JavaKinds [javaTagKind (type)].name; + else if (isLanguage (Lang_vera)) + result = VeraKinds [veraTagKind (type)].name; + else + result = CKinds [cTagKind (type)].name; + return result; +} + +static int tagLetter (const tagType type) +{ + int result; + if (isLanguage (Lang_csharp)) + result = CsharpKinds [csharpTagKind (type)].letter; + else if (isLanguage (Lang_java)) + result = JavaKinds [javaTagKind (type)].letter; + else if (isLanguage (Lang_vera)) + result = VeraKinds [veraTagKind (type)].letter; + else + result = CKinds [cTagKind (type)].letter; + return result; +} + +static boolean includeTag (const tagType type, const boolean isFileScope) +{ + boolean result; + if (isFileScope && ! Option.include.fileScope) + result = FALSE; + else if (isLanguage (Lang_csharp)) + result = CsharpKinds [csharpTagKind (type)].enabled; + else if (isLanguage (Lang_java)) + result = JavaKinds [javaTagKind (type)].enabled; + else if (isLanguage (Lang_vera)) + result = VeraKinds [veraTagKind (type)].enabled; + else + result = CKinds [cTagKind (type)].enabled; + return result; +} + +static tagType declToTagType (const declType declaration) +{ + tagType type = TAG_UNDEFINED; + + switch (declaration) + { + case DECL_CLASS: type = TAG_CLASS; break; + case DECL_ENUM: type = TAG_ENUM; break; + case DECL_EVENT: type = TAG_EVENT; break; + case DECL_FUNCTION: type = TAG_FUNCTION; break; + case DECL_INTERFACE: type = TAG_INTERFACE; break; + case DECL_NAMESPACE: type = TAG_NAMESPACE; break; + case DECL_PROGRAM: type = TAG_PROGRAM; break; + case DECL_TASK: type = TAG_TASK; break; + case DECL_STRUCT: type = TAG_STRUCT; break; + case DECL_UNION: type = TAG_UNION; break; + + default: Assert ("Unexpected declaration" == NULL); break; + } + return type; +} + +static const char* accessField (const statementInfo *const st) +{ + const char* result = NULL; + if (isLanguage (Lang_cpp) && st->scope == SCOPE_FRIEND) + result = "friend"; + else if (st->member.access != ACCESS_UNDEFINED) + result = accessString (st->member.access); + return result; +} + +static void addContextSeparator (vString *const scope) +{ + if (isLanguage (Lang_c) || isLanguage (Lang_cpp)) + vStringCatS (scope, "::"); + else if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + vStringCatS (scope, "."); +} + +static void addOtherFields (tagEntryInfo* const tag, const tagType type, + const statementInfo *const st, + vString *const scope, vString *const typeRef) +{ + /* For selected tag types, append an extension flag designating the + * parent object in which the tag is defined. + */ + switch (type) + { + default: break; + + case TAG_FUNCTION: + case TAG_METHOD: + case TAG_PROTOTYPE: + if (vStringLength (Signature) > 0) + tag->extensionFields.signature = vStringValue (Signature); + case TAG_CLASS: + case TAG_ENUM: + case TAG_ENUMERATOR: + case TAG_EVENT: + case TAG_FIELD: + case TAG_INTERFACE: + case TAG_MEMBER: + case TAG_NAMESPACE: + case TAG_PROPERTY: + case TAG_STRUCT: + case TAG_TASK: + case TAG_TYPEDEF: + case TAG_UNION: + if (vStringLength (scope) > 0 && + (isMember (st) || st->parent->declaration == DECL_NAMESPACE)) + { + if (isType (st->context, TOKEN_NAME)) + tag->extensionFields.scope [0] = tagName (TAG_CLASS); + else + tag->extensionFields.scope [0] = + tagName (declToTagType (parentDecl (st))); + tag->extensionFields.scope [1] = vStringValue (scope); + } + if ((type == TAG_CLASS || type == TAG_INTERFACE || + type == TAG_STRUCT) && vStringLength (st->parentClasses) > 0) + { + + tag->extensionFields.inheritance = + vStringValue (st->parentClasses); + } + if (st->implementation != IMP_DEFAULT && + (isLanguage (Lang_cpp) || isLanguage (Lang_csharp) || + isLanguage (Lang_java))) + { + tag->extensionFields.implementation = + implementationString (st->implementation); + } + if (isMember (st)) + { + tag->extensionFields.access = accessField (st); + } + break; + } + + /* Add typename info, type of the tag and name of struct/union/etc. */ + if ((type == TAG_TYPEDEF || type == TAG_VARIABLE || type == TAG_MEMBER) + && isContextualStatement(st)) + { + char *p; + + tag->extensionFields.typeRef [0] = + tagName (declToTagType (st->declaration)); + p = vStringValue (st->blockName->name); + + /* If there was no {} block get the name from the token before the + * name (current token is ';' or ',', previous token is the name). + */ + if (p == NULL || *p == '\0') + { + tokenInfo *const prev2 = prevToken (st, 2); + if (isType (prev2, TOKEN_NAME)) + p = vStringValue (prev2->name); + } + + /* Prepend the scope name if there is one. */ + if (vStringLength (scope) > 0) + { + vStringCopy(typeRef, scope); + addContextSeparator (typeRef); + vStringCatS(typeRef, p); + p = vStringValue (typeRef); + } + tag->extensionFields.typeRef [1] = p; + } +} + +static void findScopeHierarchy (vString *const string, + const statementInfo *const st) +{ + vStringClear (string); + if (isType (st->context, TOKEN_NAME)) + vStringCopy (string, st->context->name); + if (st->parent != NULL) + { + vString *temp = vStringNew (); + const statementInfo *s; + for (s = st->parent ; s != NULL ; s = s->parent) + { + if (isContextualStatement (s) || + s->declaration == DECL_NAMESPACE || + s->declaration == DECL_PROGRAM) + { + vStringCopy (temp, string); + vStringClear (string); + Assert (isType (s->blockName, TOKEN_NAME)); + if (isType (s->context, TOKEN_NAME) && + vStringLength (s->context->name) > 0) + { + vStringCat (string, s->context->name); + addContextSeparator (string); + } + vStringCat (string, s->blockName->name); + if (vStringLength (temp) > 0) + addContextSeparator (string); + vStringCat (string, temp); + } + } + vStringDelete (temp); + } +} + +static void makeExtraTagEntry (const tagType type, tagEntryInfo *const e, + vString *const scope) +{ + if (Option.include.qualifiedTags && + scope != NULL && vStringLength (scope) > 0) + { + vString *const scopedName = vStringNew (); + + if (type != TAG_ENUMERATOR) + vStringCopy (scopedName, scope); + else + { + /* remove last component (i.e. enumeration name) from scope */ + const char* const sc = vStringValue (scope); + const char* colon = strrchr (sc, ':'); + if (colon != NULL) + { + while (*colon == ':' && colon > sc) + --colon; + vStringNCopy (scopedName, scope, colon + 1 - sc); + } + } + if (vStringLength (scopedName) > 0) + { + addContextSeparator (scopedName); + vStringCatS (scopedName, e->name); + e->name = vStringValue (scopedName); + makeTagEntry (e); + } + vStringDelete (scopedName); + } +} + +static void makeTag (const tokenInfo *const token, + const statementInfo *const st, + boolean isFileScope, const tagType type) +{ + /* Nothing is really of file scope when it appears in a header file. + */ + isFileScope = (boolean) (isFileScope && ! isHeaderFile ()); + + if (isType (token, TOKEN_NAME) && vStringLength (token->name) > 0 && + includeTag (type, isFileScope)) + { + vString *scope = vStringNew (); + /* Use "typeRef" to store the typename from addOtherFields() until + * it's used in makeTagEntry(). + */ + vString *typeRef = vStringNew (); + tagEntryInfo e; + + initTagEntry (&e, vStringValue (token->name)); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.isFileScope = isFileScope; + e.kindName = tagName (type); + e.kind = tagLetter (type); + + findScopeHierarchy (scope, st); + addOtherFields (&e, type, st, scope, typeRef); + + makeTagEntry (&e); + makeExtraTagEntry (type, &e, scope); + vStringDelete (scope); + vStringDelete (typeRef); + } +} + +static boolean isValidTypeSpecifier (const declType declaration) +{ + boolean result; + switch (declaration) + { + case DECL_BASE: + case DECL_CLASS: + case DECL_ENUM: + case DECL_EVENT: + case DECL_STRUCT: + case DECL_UNION: + result = TRUE; + break; + + default: + result = FALSE; + break; + } + return result; +} + +static void qualifyEnumeratorTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + if (isType (nameToken, TOKEN_NAME)) + makeTag (nameToken, st, TRUE, TAG_ENUMERATOR); +} + +static void qualifyFunctionTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + if (isType (nameToken, TOKEN_NAME)) + { + tagType type; + const boolean isFileScope = + (boolean) (st->member.access == ACCESS_PRIVATE || + (!isMember (st) && st->scope == SCOPE_STATIC)); + if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + type = TAG_METHOD; + else if (isLanguage (Lang_vera) && st->declaration == DECL_TASK) + type = TAG_TASK; + else + type = TAG_FUNCTION; + makeTag (nameToken, st, isFileScope, type); + } +} + +static void qualifyFunctionDeclTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + if (! isType (nameToken, TOKEN_NAME)) + ; + else if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + qualifyFunctionTag (st, nameToken); + else if (st->scope == SCOPE_TYPEDEF) + makeTag (nameToken, st, TRUE, TAG_TYPEDEF); + else if (isValidTypeSpecifier (st->declaration) && ! isLanguage (Lang_csharp)) + makeTag (nameToken, st, TRUE, TAG_PROTOTYPE); +} + +static void qualifyCompoundTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + if (isType (nameToken, TOKEN_NAME)) + { + const tagType type = declToTagType (st->declaration); + const boolean fileScoped = (boolean) + (!(isLanguage (Lang_java) || + isLanguage (Lang_csharp) || + isLanguage (Lang_vera))); + + if (type != TAG_UNDEFINED) + makeTag (nameToken, st, fileScoped, type); + } +} + +static void qualifyBlockTag (statementInfo *const st, + const tokenInfo *const nameToken) +{ + switch (st->declaration) + { + case DECL_CLASS: + case DECL_ENUM: + case DECL_INTERFACE: + case DECL_NAMESPACE: + case DECL_PROGRAM: + case DECL_STRUCT: + case DECL_UNION: + qualifyCompoundTag (st, nameToken); + break; + default: break; + } +} + +static void qualifyVariableTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + /* We have to watch that we do not interpret a declaration of the + * form "struct tag;" as a variable definition. In such a case, the + * token preceding the name will be a keyword. + */ + if (! isType (nameToken, TOKEN_NAME)) + ; + else if (st->scope == SCOPE_TYPEDEF) + makeTag (nameToken, st, TRUE, TAG_TYPEDEF); + else if (st->declaration == DECL_EVENT) + makeTag (nameToken, st, (boolean) (st->member.access == ACCESS_PRIVATE), + TAG_EVENT); + else if (st->declaration == DECL_PACKAGE) + makeTag (nameToken, st, FALSE, TAG_PACKAGE); + else if (isValidTypeSpecifier (st->declaration)) + { + if (st->notVariable) + ; + else if (isMember (st)) + { + if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + makeTag (nameToken, st, + (boolean) (st->member.access == ACCESS_PRIVATE), TAG_FIELD); + else if (st->scope == SCOPE_GLOBAL || st->scope == SCOPE_STATIC) + makeTag (nameToken, st, TRUE, TAG_MEMBER); + } + else + { + if (st->scope == SCOPE_EXTERN || ! st->haveQualifyingName) + makeTag (nameToken, st, FALSE, TAG_EXTERN_VAR); + else if (st->inFunction) + makeTag (nameToken, st, (boolean) (st->scope == SCOPE_STATIC), + TAG_LOCAL); + else + makeTag (nameToken, st, (boolean) (st->scope == SCOPE_STATIC), + TAG_VARIABLE); + } + } +} + +/* +* Parsing functions +*/ + +static int skipToOneOf (const char *const chars) +{ + int c; + do + c = cppGetc (); + while (c != EOF && c != '\0' && strchr (chars, c) == NULL); + return c; +} + +/* Skip to the next non-white character. + */ +static int skipToNonWhite (void) +{ + boolean found = FALSE; + int c; + +#if 0 + do + c = cppGetc (); + while (isspace (c)); +#else + while (1) + { + c = cppGetc (); + if (isspace (c)) + found = TRUE; + else + break; + } + if (CollectingSignature && found) + vStringPut (Signature, ' '); +#endif + + return c; +} + +/* Skips to the next brace in column 1. This is intended for cases where + * preprocessor constructs result in unbalanced braces. + */ +static void skipToFormattedBraceMatch (void) +{ + int c, next; + + c = cppGetc (); + next = cppGetc (); + while (c != EOF && (c != '\n' || next != '}')) + { + c = next; + next = cppGetc (); + } +} + +/* Skip to the matching character indicated by the pair string. If skipping + * to a matching brace and any brace is found within a different level of a + * #if conditional statement while brace formatting is in effect, we skip to + * the brace matched by its formatting. It is assumed that we have already + * read the character which starts the group (i.e. the first character of + * "pair"). + */ +static void skipToMatch (const char *const pair) +{ + const boolean braceMatching = (boolean) (strcmp ("{}", pair) == 0); + const boolean braceFormatting = (boolean) (isBraceFormat () && braceMatching); + const unsigned int initialLevel = getDirectiveNestLevel (); + const int begin = pair [0], end = pair [1]; + const unsigned long inputLineNumber = getInputLineNumber (); + int matchLevel = 1; + int c = '\0'; + + while (matchLevel > 0 && (c = skipToNonWhite ()) != EOF) + { + if (CollectingSignature) + vStringPut (Signature, c); + if (c == begin) + { + ++matchLevel; + if (braceFormatting && getDirectiveNestLevel () != initialLevel) + { + skipToFormattedBraceMatch (); + break; + } + } + else if (c == end) + { + --matchLevel; + if (braceFormatting && getDirectiveNestLevel () != initialLevel) + { + skipToFormattedBraceMatch (); + break; + } + } + } + if (c == EOF) + { + verbose ("%s: failed to find match for '%c' at line %lu\n", + getInputFileName (), begin, inputLineNumber); + if (braceMatching) + longjmp (Exception, (int) ExceptionBraceFormattingError); + else + longjmp (Exception, (int) ExceptionFormattingError); + } +} + +static void skipParens (void) +{ + const int c = skipToNonWhite (); + + if (c == '(') + skipToMatch ("()"); + else + cppUngetc (c); +} + +static void skipBraces (void) +{ + const int c = skipToNonWhite (); + + if (c == '{') + skipToMatch ("{}"); + else + cppUngetc (c); +} + +static keywordId analyzeKeyword (const char *const name) +{ + const keywordId id = (keywordId) lookupKeyword (name, getSourceLanguage ()); + return id; +} + +static void analyzeIdentifier (tokenInfo *const token) +{ + char *const name = vStringValue (token->name); + const char *replacement = NULL; + boolean parensToo = FALSE; + + if (isLanguage (Lang_java) || + ! isIgnoreToken (name, &parensToo, &replacement)) + { + if (replacement != NULL) + token->keyword = analyzeKeyword (replacement); + else + token->keyword = analyzeKeyword (vStringValue (token->name)); + + if (token->keyword == KEYWORD_NONE) + token->type = TOKEN_NAME; + else + token->type = TOKEN_KEYWORD; + } + else + { + initToken (token); + if (parensToo) + { + int c = skipToNonWhite (); + + if (c == '(') + skipToMatch ("()"); + } + } +} + +static void readIdentifier (tokenInfo *const token, const int firstChar) +{ + vString *const name = token->name; + int c = firstChar; + boolean first = TRUE; + + initToken (token); + + /* Bug #1585745: strangely, C++ destructors allow whitespace between + * the ~ and the class name. */ + if (isLanguage (Lang_cpp) && firstChar == '~') + { + vStringPut (name, c); + c = skipToNonWhite (); + } + + do + { + vStringPut (name, c); + if (CollectingSignature) + { + if (!first) + vStringPut (Signature, c); + first = FALSE; + } + c = cppGetc (); + } while (isident (c) || ((isLanguage (Lang_java) || isLanguage (Lang_csharp)) && (isHighChar (c) || c == '.'))); + vStringTerminate (name); + cppUngetc (c); /* unget non-identifier character */ + + analyzeIdentifier (token); +} + +static void readPackageName (tokenInfo *const token, const int firstChar) +{ + vString *const name = token->name; + int c = firstChar; + + initToken (token); + + while (isident (c) || c == '.') + { + vStringPut (name, c); + c = cppGetc (); + } + vStringTerminate (name); + cppUngetc (c); /* unget non-package character */ +} + +static void readPackageOrNamespace (statementInfo *const st, const declType declaration) +{ + st->declaration = declaration; + + if (declaration == DECL_NAMESPACE && !isLanguage (Lang_csharp)) + { + /* In C++ a namespace is specified one level at a time. */ + return; + } + else + { + /* In C#, a namespace can also be specified like a Java package name. */ + tokenInfo *const token = activeToken (st); + Assert (isType (token, TOKEN_KEYWORD)); + readPackageName (token, skipToNonWhite ()); + token->type = TOKEN_NAME; + st->gotName = TRUE; + st->haveQualifyingName = TRUE; + } +} + +static void processName (statementInfo *const st) +{ + Assert (isType (activeToken (st), TOKEN_NAME)); + if (st->gotName && st->declaration == DECL_NONE) + st->declaration = DECL_BASE; + st->gotName = TRUE; + st->haveQualifyingName = TRUE; +} + +static void readOperator (statementInfo *const st) +{ + const char *const acceptable = "+-*/%^&|~!=<>,[]"; + const tokenInfo* const prev = prevToken (st,1); + tokenInfo *const token = activeToken (st); + vString *const name = token->name; + int c = skipToNonWhite (); + + /* When we arrive here, we have the keyword "operator" in 'name'. + */ + if (isType (prev, TOKEN_KEYWORD) && (prev->keyword == KEYWORD_ENUM || + prev->keyword == KEYWORD_STRUCT || prev->keyword == KEYWORD_UNION)) + ; /* ignore "operator" keyword if preceded by these keywords */ + else if (c == '(') + { + /* Verify whether this is a valid function call (i.e. "()") operator. + */ + if (cppGetc () == ')') + { + vStringPut (name, ' '); /* always separate operator from keyword */ + c = skipToNonWhite (); + if (c == '(') + vStringCatS (name, "()"); + } + else + { + skipToMatch ("()"); + c = cppGetc (); + } + } + else if (isident1 (c)) + { + /* Handle "new" and "delete" operators, and conversion functions + * (per 13.3.1.1.2 [2] of the C++ spec). + */ + boolean whiteSpace = TRUE; /* default causes insertion of space */ + do + { + if (isspace (c)) + whiteSpace = TRUE; + else + { + if (whiteSpace) + { + vStringPut (name, ' '); + whiteSpace = FALSE; + } + vStringPut (name, c); + } + c = cppGetc (); + } while (! isOneOf (c, "(;") && c != EOF); + vStringTerminate (name); + } + else if (isOneOf (c, acceptable)) + { + vStringPut (name, ' '); /* always separate operator from keyword */ + do + { + vStringPut (name, c); + c = cppGetc (); + } while (isOneOf (c, acceptable)); + vStringTerminate (name); + } + + cppUngetc (c); + + token->type = TOKEN_NAME; + token->keyword = KEYWORD_NONE; + processName (st); +} + +static void copyToken (tokenInfo *const dest, const tokenInfo *const src) +{ + dest->type = src->type; + dest->keyword = src->keyword; + dest->filePosition = src->filePosition; + dest->lineNumber = src->lineNumber; + vStringCopy (dest->name, src->name); +} + +static void setAccess (statementInfo *const st, const accessType access) +{ + if (isMember (st)) + { + if (isLanguage (Lang_cpp)) + { + int c = skipToNonWhite (); + + if (c == ':') + reinitStatement (st, FALSE); + else + cppUngetc (c); + + st->member.accessDefault = access; + } + st->member.access = access; + } +} + +static void discardTypeList (tokenInfo *const token) +{ + int c = skipToNonWhite (); + while (isident1 (c)) + { + readIdentifier (token, c); + c = skipToNonWhite (); + if (c == '.' || c == ',') + c = skipToNonWhite (); + } + cppUngetc (c); +} + +static void addParentClass (statementInfo *const st, tokenInfo *const token) +{ + if (vStringLength (token->name) > 0 && + vStringLength (st->parentClasses) > 0) + { + vStringPut (st->parentClasses, ','); + } + vStringCat (st->parentClasses, token->name); +} + +static void readParents (statementInfo *const st, const int qualifier) +{ + tokenInfo *const token = newToken (); + tokenInfo *const parent = newToken (); + int c; + + do + { + c = skipToNonWhite (); + if (isident1 (c)) + { + readIdentifier (token, c); + if (isType (token, TOKEN_NAME)) + vStringCat (parent->name, token->name); + else + { + addParentClass (st, parent); + initToken (parent); + } + } + else if (c == qualifier) + vStringPut (parent->name, c); + else if (c == '<') + skipToMatch ("<>"); + else if (isType (token, TOKEN_NAME)) + { + addParentClass (st, parent); + initToken (parent); + } + } while (c != '{' && c != EOF); + cppUngetc (c); + deleteToken (parent); + deleteToken (token); +} + +static void skipStatement (statementInfo *const st) +{ + st->declaration = DECL_IGNORE; + skipToOneOf (";"); +} + +static void processInterface (statementInfo *const st) +{ + st->declaration = DECL_INTERFACE; +} + +static void processToken (tokenInfo *const token, statementInfo *const st) +{ + switch (token->keyword) /* is it a reserved word? */ + { + default: break; + + case KEYWORD_NONE: processName (st); break; + case KEYWORD_ABSTRACT: st->implementation = IMP_ABSTRACT; break; + case KEYWORD_ATTRIBUTE: skipParens (); initToken (token); break; + case KEYWORD_BIND: st->declaration = DECL_BASE; break; + case KEYWORD_BIT: st->declaration = DECL_BASE; break; + case KEYWORD_CATCH: skipParens (); skipBraces (); break; + case KEYWORD_CHAR: st->declaration = DECL_BASE; break; + case KEYWORD_CLASS: st->declaration = DECL_CLASS; break; + case KEYWORD_CONST: st->declaration = DECL_BASE; break; + case KEYWORD_DOUBLE: st->declaration = DECL_BASE; break; + case KEYWORD_ENUM: st->declaration = DECL_ENUM; break; + case KEYWORD_EXTENDS: readParents (st, '.'); + setToken (st, TOKEN_NONE); break; + case KEYWORD_FLOAT: st->declaration = DECL_BASE; break; + case KEYWORD_FUNCTION: st->declaration = DECL_BASE; break; + case KEYWORD_FRIEND: st->scope = SCOPE_FRIEND; break; + case KEYWORD_GOTO: skipStatement (st); break; + case KEYWORD_IMPLEMENTS:readParents (st, '.'); + setToken (st, TOKEN_NONE); break; + case KEYWORD_IMPORT: skipStatement (st); break; + case KEYWORD_INT: st->declaration = DECL_BASE; break; + case KEYWORD_INTEGER: st->declaration = DECL_BASE; break; + case KEYWORD_INTERFACE: processInterface (st); break; + case KEYWORD_LOCAL: setAccess (st, ACCESS_LOCAL); break; + case KEYWORD_LONG: st->declaration = DECL_BASE; break; + case KEYWORD_OPERATOR: readOperator (st); break; + case KEYWORD_PRIVATE: setAccess (st, ACCESS_PRIVATE); break; + case KEYWORD_PROGRAM: st->declaration = DECL_PROGRAM; break; + case KEYWORD_PROTECTED: setAccess (st, ACCESS_PROTECTED); break; + case KEYWORD_PUBLIC: setAccess (st, ACCESS_PUBLIC); break; + case KEYWORD_RETURN: skipStatement (st); break; + case KEYWORD_SHORT: st->declaration = DECL_BASE; break; + case KEYWORD_SIGNED: st->declaration = DECL_BASE; break; + case KEYWORD_STRING: st->declaration = DECL_BASE; break; + case KEYWORD_STRUCT: st->declaration = DECL_STRUCT; break; + case KEYWORD_TASK: st->declaration = DECL_TASK; break; + case KEYWORD_THROWS: discardTypeList (token); break; + case KEYWORD_UNION: st->declaration = DECL_UNION; break; + case KEYWORD_UNSIGNED: st->declaration = DECL_BASE; break; + case KEYWORD_USING: skipStatement (st); break; + case KEYWORD_VOID: st->declaration = DECL_BASE; break; + case KEYWORD_VOLATILE: st->declaration = DECL_BASE; break; + case KEYWORD_VIRTUAL: st->implementation = IMP_VIRTUAL; break; + case KEYWORD_WCHAR_T: st->declaration = DECL_BASE; break; + + case KEYWORD_NAMESPACE: readPackageOrNamespace (st, DECL_NAMESPACE); break; + case KEYWORD_PACKAGE: readPackageOrNamespace (st, DECL_PACKAGE); break; + + case KEYWORD_EVENT: + if (isLanguage (Lang_csharp)) + st->declaration = DECL_EVENT; + break; + + case KEYWORD_TYPEDEF: + reinitStatement (st, FALSE); + st->scope = SCOPE_TYPEDEF; + break; + + case KEYWORD_EXTERN: + if (! isLanguage (Lang_csharp) || !st->gotName) + { + reinitStatement (st, FALSE); + st->scope = SCOPE_EXTERN; + st->declaration = DECL_BASE; + } + break; + + case KEYWORD_STATIC: + if (! (isLanguage (Lang_java) || isLanguage (Lang_csharp))) + { + reinitStatement (st, FALSE); + st->scope = SCOPE_STATIC; + st->declaration = DECL_BASE; + } + break; + + case KEYWORD_FOR: + case KEYWORD_FOREACH: + case KEYWORD_IF: + case KEYWORD_SWITCH: + case KEYWORD_WHILE: + { + int c = skipToNonWhite (); + if (c == '(') + skipToMatch ("()"); + break; + } + } +} + +/* +* Parenthesis handling functions +*/ + +static void restartStatement (statementInfo *const st) +{ + tokenInfo *const save = newToken (); + tokenInfo *token = activeToken (st); + + copyToken (save, token); + DebugStatement ( if (debug (DEBUG_PARSE)) printf ("");) + reinitStatement (st, FALSE); + token = activeToken (st); + copyToken (token, save); + deleteToken (save); + processToken (token, st); +} + +/* Skips over a the mem-initializer-list of a ctor-initializer, defined as: + * + * mem-initializer-list: + * mem-initializer, mem-initializer-list + * + * mem-initializer: + * [::] [nested-name-spec] class-name (...) + * identifier + */ +static void skipMemIntializerList (tokenInfo *const token) +{ + int c; + + do + { + c = skipToNonWhite (); + while (isident1 (c) || c == ':') + { + if (c != ':') + readIdentifier (token, c); + c = skipToNonWhite (); + } + if (c == '<') + { + skipToMatch ("<>"); + c = skipToNonWhite (); + } + if (c == '(') + { + skipToMatch ("()"); + c = skipToNonWhite (); + } + } while (c == ','); + cppUngetc (c); +} + +static void skipMacro (statementInfo *const st) +{ + tokenInfo *const prev2 = prevToken (st, 2); + + if (isType (prev2, TOKEN_NAME)) + retardToken (st); + skipToMatch ("()"); +} + +/* Skips over characters following the parameter list. This will be either + * non-ANSI style function declarations or C++ stuff. Our choices: + * + * C (K&R): + * int func (); + * int func (one, two) int one; float two; {...} + * C (ANSI): + * int func (int one, float two); + * int func (int one, float two) {...} + * C++: + * int foo (...) [const|volatile] [throw (...)]; + * int foo (...) [const|volatile] [throw (...)] [ctor-initializer] {...} + * int foo (...) [const|volatile] [throw (...)] try [ctor-initializer] {...} + * catch (...) {...} + */ +static boolean skipPostArgumentStuff ( + statementInfo *const st, parenInfo *const info) +{ + tokenInfo *const token = activeToken (st); + unsigned int parameters = info->parameterCount; + unsigned int elementCount = 0; + boolean restart = FALSE; + boolean end = FALSE; + int c = skipToNonWhite (); + + do + { + switch (c) + { + case ')': break; + case ':': skipMemIntializerList (token);break; /* ctor-initializer */ + case '[': skipToMatch ("[]"); break; + case '=': cppUngetc (c); end = TRUE; break; + case '{': cppUngetc (c); end = TRUE; break; + case '}': cppUngetc (c); end = TRUE; break; + + case '(': + if (elementCount > 0) + ++elementCount; + skipToMatch ("()"); + break; + + case ';': + if (parameters == 0 || elementCount < 2) + { + cppUngetc (c); + end = TRUE; + } + else if (--parameters == 0) + end = TRUE; + break; + + default: + if (isident1 (c)) + { + readIdentifier (token, c); + switch (token->keyword) + { + case KEYWORD_ATTRIBUTE: skipParens (); break; + case KEYWORD_THROW: skipParens (); break; + case KEYWORD_TRY: break; + + case KEYWORD_CONST: + case KEYWORD_VOLATILE: + if (vStringLength (Signature) > 0) + { + vStringPut (Signature, ' '); + vStringCat (Signature, token->name); + } + break; + + case KEYWORD_CATCH: + case KEYWORD_CLASS: + case KEYWORD_EXPLICIT: + case KEYWORD_EXTERN: + case KEYWORD_FRIEND: + case KEYWORD_INLINE: + case KEYWORD_MUTABLE: + case KEYWORD_NAMESPACE: + case KEYWORD_NEW: + case KEYWORD_NEWCOV: + case KEYWORD_OPERATOR: + case KEYWORD_OVERLOAD: + case KEYWORD_PRIVATE: + case KEYWORD_PROTECTED: + case KEYWORD_PUBLIC: + case KEYWORD_STATIC: + case KEYWORD_TEMPLATE: + case KEYWORD_TYPEDEF: + case KEYWORD_TYPENAME: + case KEYWORD_USING: + case KEYWORD_VIRTUAL: + /* Never allowed within parameter declarations. */ + restart = TRUE; + end = TRUE; + break; + + default: + if (isType (token, TOKEN_NONE)) + ; + else if (info->isKnrParamList && info->parameterCount > 0) + ++elementCount; + else + { + /* If we encounter any other identifier immediately + * following an empty parameter list, this is almost + * certainly one of those Microsoft macro "thingies" + * that the automatic source code generation sticks + * in. Terminate the current statement. + */ + restart = TRUE; + end = TRUE; + } + break; + } + } + } + if (! end) + { + c = skipToNonWhite (); + if (c == EOF) + end = TRUE; + } + } while (! end); + + if (restart) + restartStatement (st); + else + setToken (st, TOKEN_NONE); + + return (boolean) (c != EOF); +} + +static void skipJavaThrows (statementInfo *const st) +{ + tokenInfo *const token = activeToken (st); + int c = skipToNonWhite (); + + if (isident1 (c)) + { + readIdentifier (token, c); + if (token->keyword == KEYWORD_THROWS) + { + do + { + c = skipToNonWhite (); + if (isident1 (c)) + { + readIdentifier (token, c); + c = skipToNonWhite (); + } + } while (c == '.' || c == ','); + } + } + cppUngetc (c); + setToken (st, TOKEN_NONE); +} + +static void analyzePostParens (statementInfo *const st, parenInfo *const info) +{ + const unsigned long inputLineNumber = getInputLineNumber (); + int c = skipToNonWhite (); + + cppUngetc (c); + if (isOneOf (c, "{;,=")) + ; + else if (isLanguage (Lang_java)) + skipJavaThrows (st); + else + { + if (! skipPostArgumentStuff (st, info)) + { + verbose ( + "%s: confusing argument declarations beginning at line %lu\n", + getInputFileName (), inputLineNumber); + longjmp (Exception, (int) ExceptionFormattingError); + } + } +} + +static boolean languageSupportsGenerics (void) +{ + return (boolean) (isLanguage (Lang_cpp) || isLanguage (Lang_csharp) || + isLanguage (Lang_java)); +} + +static void processAngleBracket (void) +{ + int c = cppGetc (); + if (c == '>') { + /* already found match for template */ + } else if (languageSupportsGenerics () && c != '<' && c != '=') { + /* this is a template */ + cppUngetc (c); + skipToMatch ("<>"); + } else if (c == '<') { + /* skip "<<" or "<<=". */ + c = cppGetc (); + if (c != '=') { + cppUngetc (c); + } + } else { + cppUngetc (c); + } +} + +static void parseJavaAnnotation (statementInfo *const st) +{ + /* + * @Override + * @Target(ElementType.METHOD) + * @SuppressWarnings(value = "unchecked") + * + * But watch out for "@interface"! + */ + tokenInfo *const token = activeToken (st); + + int c = skipToNonWhite (); + readIdentifier (token, c); + if (token->keyword == KEYWORD_INTERFACE) + { + /* Oops. This was actually "@interface" defining a new annotation. */ + processInterface (st); + } + else + { + /* Bug #1691412: skip any annotation arguments. */ + skipParens (); + } +} + +static int parseParens (statementInfo *const st, parenInfo *const info) +{ + tokenInfo *const token = activeToken (st); + unsigned int identifierCount = 0; + unsigned int depth = 1; + boolean firstChar = TRUE; + int nextChar = '\0'; + + CollectingSignature = TRUE; + vStringClear (Signature); + vStringPut (Signature, '('); + info->parameterCount = 1; + do + { + int c = skipToNonWhite (); + vStringPut (Signature, c); + + switch (c) + { + case '&': + case '*': + info->isPointer = TRUE; + info->isKnrParamList = FALSE; + if (identifierCount == 0) + info->isParamList = FALSE; + initToken (token); + break; + + case ':': + info->isKnrParamList = FALSE; + break; + + case '.': + info->isNameCandidate = FALSE; + c = cppGetc (); + if (c != '.') + { + cppUngetc (c); + info->isKnrParamList = FALSE; + } + else + { + c = cppGetc (); + if (c != '.') + { + cppUngetc (c); + info->isKnrParamList = FALSE; + } + else + vStringCatS (Signature, "..."); /* variable arg list */ + } + break; + + case ',': + info->isNameCandidate = FALSE; + if (info->isKnrParamList) + { + ++info->parameterCount; + identifierCount = 0; + } + break; + + case '=': + info->isKnrParamList = FALSE; + info->isNameCandidate = FALSE; + if (firstChar) + { + info->isParamList = FALSE; + skipMacro (st); + depth = 0; + } + break; + + case '[': + info->isKnrParamList = FALSE; + skipToMatch ("[]"); + break; + + case '<': + info->isKnrParamList = FALSE; + processAngleBracket (); + break; + + case ')': + if (firstChar) + info->parameterCount = 0; + --depth; + break; + + case '(': + info->isKnrParamList = FALSE; + if (firstChar) + { + info->isNameCandidate = FALSE; + cppUngetc (c); + vStringClear (Signature); + skipMacro (st); + depth = 0; + vStringChop (Signature); + } + else if (isType (token, TOKEN_PAREN_NAME)) + { + c = skipToNonWhite (); + if (c == '*') /* check for function pointer */ + { + skipToMatch ("()"); + c = skipToNonWhite (); + if (c == '(') + skipToMatch ("()"); + else + cppUngetc (c); + } + else + { + cppUngetc (c); + cppUngetc ('('); + info->nestedArgs = TRUE; + } + } + else + ++depth; + break; + + default: + if (c == '@' && isLanguage (Lang_java)) + { + parseJavaAnnotation(st); + } + else if (isident1 (c)) + { + if (++identifierCount > 1) + info->isKnrParamList = FALSE; + readIdentifier (token, c); + if (isType (token, TOKEN_NAME) && info->isNameCandidate) + token->type = TOKEN_PAREN_NAME; + else if (isType (token, TOKEN_KEYWORD)) + { + if (token->keyword != KEYWORD_CONST && + token->keyword != KEYWORD_VOLATILE) + { + info->isKnrParamList = FALSE; + info->isNameCandidate = FALSE; + } + } + } + else + { + info->isParamList = FALSE; + info->isKnrParamList = FALSE; + info->isNameCandidate = FALSE; + info->invalidContents = TRUE; + } + break; + } + firstChar = FALSE; + } while (! info->nestedArgs && depth > 0 && + (info->isKnrParamList || info->isNameCandidate)); + + if (! info->nestedArgs) while (depth > 0) + { + skipToMatch ("()"); + --depth; + } + + if (! info->isNameCandidate) + initToken (token); + + vStringTerminate (Signature); + if (info->isKnrParamList) + vStringClear (Signature); + CollectingSignature = FALSE; + return nextChar; +} + +static void initParenInfo (parenInfo *const info) +{ + info->isPointer = FALSE; + info->isParamList = TRUE; + info->isKnrParamList = isLanguage (Lang_c); + info->isNameCandidate = TRUE; + info->invalidContents = FALSE; + info->nestedArgs = FALSE; + info->parameterCount = 0; +} + +static void analyzeParens (statementInfo *const st) +{ + tokenInfo *const prev = prevToken (st, 1); + + if (st->inFunction && ! st->assignment) + st->notVariable = TRUE; + if (! isType (prev, TOKEN_NONE)) /* in case of ignored enclosing macros */ + { + tokenInfo *const token = activeToken (st); + parenInfo info; + int c; + + initParenInfo (&info); + parseParens (st, &info); + c = skipToNonWhite (); + cppUngetc (c); + if (info.invalidContents) + reinitStatement (st, FALSE); + else if (info.isNameCandidate && isType (token, TOKEN_PAREN_NAME) && + ! st->gotParenName && + (! info.isParamList || ! st->haveQualifyingName || + c == '(' || + (c == '=' && st->implementation != IMP_VIRTUAL) || + (st->declaration == DECL_NONE && isOneOf (c, ",;")))) + { + token->type = TOKEN_NAME; + processName (st); + st->gotParenName = TRUE; + if (! (c == '(' && info.nestedArgs)) + st->isPointer = info.isPointer; + } + else if (! st->gotArgs && info.isParamList) + { + st->gotArgs = TRUE; + setToken (st, TOKEN_ARGS); + advanceToken (st); + if (st->scope != SCOPE_TYPEDEF) + analyzePostParens (st, &info); + } + else + setToken (st, TOKEN_NONE); + } +} + +/* +* Token parsing functions +*/ + +static void addContext (statementInfo *const st, const tokenInfo* const token) +{ + if (isType (token, TOKEN_NAME)) + { + if (vStringLength (st->context->name) > 0) + { + if (isLanguage (Lang_c) || isLanguage (Lang_cpp)) + vStringCatS (st->context->name, "::"); + else if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + vStringCatS (st->context->name, "."); + } + vStringCat (st->context->name, token->name); + st->context->type = TOKEN_NAME; + } +} + +static boolean inheritingDeclaration (declType decl) +{ + /* C# supports inheritance for enums. C++0x will too, but not yet. */ + if (decl == DECL_ENUM) + { + return (boolean) (isLanguage (Lang_csharp)); + } + return (boolean) ( + decl == DECL_CLASS || + decl == DECL_STRUCT || + decl == DECL_INTERFACE); +} + +static void processColon (statementInfo *const st) +{ + int c = (isLanguage (Lang_cpp) ? cppGetc () : skipToNonWhite ()); + const boolean doubleColon = (boolean) (c == ':'); + + if (doubleColon) + { + setToken (st, TOKEN_DOUBLE_COLON); + st->haveQualifyingName = FALSE; + } + else + { + cppUngetc (c); + if ((isLanguage (Lang_cpp) || isLanguage (Lang_csharp)) && + inheritingDeclaration (st->declaration)) + { + readParents (st, ':'); + } + else if (parentDecl (st) == DECL_STRUCT) + { + c = skipToOneOf (",;"); + if (c == ',') + setToken (st, TOKEN_COMMA); + else if (c == ';') + setToken (st, TOKEN_SEMICOLON); + } + else + { + const tokenInfo *const prev = prevToken (st, 1); + const tokenInfo *const prev2 = prevToken (st, 2); + if (prev->keyword == KEYWORD_DEFAULT || + prev2->keyword == KEYWORD_CASE || + st->parent != NULL) + { + reinitStatement (st, FALSE); + } + } + } +} + +/* Skips over any initializing value which may follow an '=' character in a + * variable definition. + */ +static int skipInitializer (statementInfo *const st) +{ + boolean done = FALSE; + int c; + + while (! done) + { + c = skipToNonWhite (); + + if (c == EOF) + longjmp (Exception, (int) ExceptionFormattingError); + else switch (c) + { + case ',': + case ';': done = TRUE; break; + + case '0': + if (st->implementation == IMP_VIRTUAL) + st->implementation = IMP_PURE_VIRTUAL; + break; + + case '[': skipToMatch ("[]"); break; + case '(': skipToMatch ("()"); break; + case '{': skipToMatch ("{}"); break; + case '<': processAngleBracket(); break; + + case '}': + if (insideEnumBody (st)) + done = TRUE; + else if (! isBraceFormat ()) + { + verbose ("%s: unexpected closing brace at line %lu\n", + getInputFileName (), getInputLineNumber ()); + longjmp (Exception, (int) ExceptionBraceFormattingError); + } + break; + + default: break; + } + } + return c; +} + +static void processInitializer (statementInfo *const st) +{ + const boolean inEnumBody = insideEnumBody (st); + int c = cppGetc (); + + if (c != '=') + { + cppUngetc (c); + c = skipInitializer (st); + st->assignment = TRUE; + if (c == ';') + setToken (st, TOKEN_SEMICOLON); + else if (c == ',') + setToken (st, TOKEN_COMMA); + else if (c == '}' && inEnumBody) + { + cppUngetc (c); + setToken (st, TOKEN_COMMA); + } + if (st->scope == SCOPE_EXTERN) + st->scope = SCOPE_GLOBAL; + } +} + +static void parseIdentifier (statementInfo *const st, const int c) +{ + tokenInfo *const token = activeToken (st); + + readIdentifier (token, c); + if (! isType (token, TOKEN_NONE)) + processToken (token, st); +} + +static void parseGeneralToken (statementInfo *const st, const int c) +{ + const tokenInfo *const prev = prevToken (st, 1); + + if (isident1 (c) || (isLanguage (Lang_java) && isHighChar (c))) + { + parseIdentifier (st, c); + if (isType (st->context, TOKEN_NAME) && + isType (activeToken (st), TOKEN_NAME) && isType (prev, TOKEN_NAME)) + { + initToken (st->context); + } + } + else if (c == '.' || c == '-') + { + if (! st->assignment) + st->notVariable = TRUE; + if (c == '-') + { + int c2 = cppGetc (); + if (c2 != '>') + cppUngetc (c2); + } + } + else if (c == '!' || c == '>') + { + int c2 = cppGetc (); + if (c2 != '=') + cppUngetc (c2); + } + else if (c == '@' && isLanguage (Lang_java)) + { + parseJavaAnnotation (st); + } + else if (isExternCDecl (st, c)) + { + st->declaration = DECL_NOMANGLE; + st->scope = SCOPE_GLOBAL; + } +} + +/* Reads characters from the pre-processor and assembles tokens, setting + * the current statement state. + */ +static void nextToken (statementInfo *const st) +{ + tokenInfo *token; + do + { + int c = skipToNonWhite (); + switch (c) + { + case EOF: longjmp (Exception, (int) ExceptionEOF); break; + case '(': analyzeParens (st); break; + case '<': processAngleBracket (); break; + case '*': st->haveQualifyingName = FALSE; break; + case ',': setToken (st, TOKEN_COMMA); break; + case ':': processColon (st); break; + case ';': setToken (st, TOKEN_SEMICOLON); break; + case '=': processInitializer (st); break; + case '[': skipToMatch ("[]"); break; + case '{': setToken (st, TOKEN_BRACE_OPEN); break; + case '}': setToken (st, TOKEN_BRACE_CLOSE); break; + default: parseGeneralToken (st, c); break; + } + token = activeToken (st); + } while (isType (token, TOKEN_NONE)); +} + +/* +* Scanning support functions +*/ + +static statementInfo *CurrentStatement = NULL; + +static statementInfo *newStatement (statementInfo *const parent) +{ + statementInfo *const st = xMalloc (1, statementInfo); + unsigned int i; + + for (i = 0 ; i < (unsigned int) NumTokens ; ++i) + st->token [i] = newToken (); + + st->context = newToken (); + st->blockName = newToken (); + st->parentClasses = vStringNew (); + + initStatement (st, parent); + CurrentStatement = st; + + return st; +} + +static void deleteStatement (void) +{ + statementInfo *const st = CurrentStatement; + statementInfo *const parent = st->parent; + unsigned int i; + + for (i = 0 ; i < (unsigned int) NumTokens ; ++i) + { + deleteToken (st->token [i]); st->token [i] = NULL; + } + deleteToken (st->blockName); st->blockName = NULL; + deleteToken (st->context); st->context = NULL; + vStringDelete (st->parentClasses); st->parentClasses = NULL; + eFree (st); + CurrentStatement = parent; +} + +static void deleteAllStatements (void) +{ + while (CurrentStatement != NULL) + deleteStatement (); +} + +static boolean isStatementEnd (const statementInfo *const st) +{ + const tokenInfo *const token = activeToken (st); + boolean isEnd; + + if (isType (token, TOKEN_SEMICOLON)) + isEnd = TRUE; + else if (isType (token, TOKEN_BRACE_CLOSE)) + /* Java and C# do not require semicolons to end a block. Neither do C++ + * namespaces. All other blocks require a semicolon to terminate them. + */ + isEnd = (boolean) (isLanguage (Lang_java) || isLanguage (Lang_csharp) || + ! isContextualStatement (st)); + else + isEnd = FALSE; + + return isEnd; +} + +static void checkStatementEnd (statementInfo *const st) +{ + const tokenInfo *const token = activeToken (st); + + if (isType (token, TOKEN_COMMA)) + reinitStatement (st, TRUE); + else if (isStatementEnd (st)) + { + DebugStatement ( if (debug (DEBUG_PARSE)) printf (""); ) + reinitStatement (st, FALSE); + cppEndStatement (); + } + else + { + cppBeginStatement (); + advanceToken (st); + } +} + +static void nest (statementInfo *const st, const unsigned int nestLevel) +{ + switch (st->declaration) + { + case DECL_CLASS: + case DECL_ENUM: + case DECL_INTERFACE: + case DECL_NAMESPACE: + case DECL_NOMANGLE: + case DECL_STRUCT: + case DECL_UNION: + createTags (nestLevel, st); + break; + + case DECL_FUNCTION: + case DECL_TASK: + st->inFunction = TRUE; + /* fall through */ + default: + if (includeTag (TAG_LOCAL, FALSE)) + createTags (nestLevel, st); + else + skipToMatch ("{}"); + break; + } + advanceToken (st); + setToken (st, TOKEN_BRACE_CLOSE); +} + +static void tagCheck (statementInfo *const st) +{ + const tokenInfo *const token = activeToken (st); + const tokenInfo *const prev = prevToken (st, 1); + const tokenInfo *const prev2 = prevToken (st, 2); + + switch (token->type) + { + case TOKEN_NAME: + if (insideEnumBody (st)) + qualifyEnumeratorTag (st, token); + break; +#if 0 + case TOKEN_PACKAGE: + if (st->haveQualifyingName) + makeTag (token, st, FALSE, TAG_PACKAGE); + break; +#endif + case TOKEN_BRACE_OPEN: + if (isType (prev, TOKEN_ARGS)) + { + if (st->haveQualifyingName) + { + if (! isLanguage (Lang_vera)) + st->declaration = DECL_FUNCTION; + if (isType (prev2, TOKEN_NAME)) + copyToken (st->blockName, prev2); + qualifyFunctionTag (st, prev2); + } + } + else if (isContextualStatement (st) || + st->declaration == DECL_NAMESPACE || + st->declaration == DECL_PROGRAM) + { + if (isType (prev, TOKEN_NAME)) + copyToken (st->blockName, prev); + else + { + /* For an anonymous struct or union we use a unique ID + * a number, so that the members can be found. + */ + char buf [20]; /* length of "_anon" + digits + null */ + sprintf (buf, "__anon%d", ++AnonymousID); + vStringCopyS (st->blockName->name, buf); + st->blockName->type = TOKEN_NAME; + st->blockName->keyword = KEYWORD_NONE; + } + qualifyBlockTag (st, prev); + } + else if (isLanguage (Lang_csharp)) + makeTag (prev, st, FALSE, TAG_PROPERTY); + break; + + case TOKEN_SEMICOLON: + case TOKEN_COMMA: + if (insideEnumBody (st)) + ; + else if (isType (prev, TOKEN_NAME)) + { + if (isContextualKeyword (prev2)) + makeTag (prev, st, TRUE, TAG_EXTERN_VAR); + else + qualifyVariableTag (st, prev); + } + else if (isType (prev, TOKEN_ARGS) && isType (prev2, TOKEN_NAME)) + { + if (st->isPointer) + qualifyVariableTag (st, prev2); + else + qualifyFunctionDeclTag (st, prev2); + } + if (isLanguage (Lang_java) && token->type == TOKEN_SEMICOLON && insideEnumBody (st)) + { + /* In Java, after an initial enum-like part, + * a semicolon introduces a class-like part. + * See Bug #1730485 for the full rationale. */ + st->parent->declaration = DECL_CLASS; + } + break; + + default: break; + } +} + +/* Parses the current file and decides whether to write out and tags that + * are discovered. + */ +static void createTags (const unsigned int nestLevel, + statementInfo *const parent) +{ + statementInfo *const st = newStatement (parent); + + DebugStatement ( if (nestLevel > 0) debugParseNest (TRUE, nestLevel); ) + while (TRUE) + { + tokenInfo *token; + + nextToken (st); + token = activeToken (st); + if (isType (token, TOKEN_BRACE_CLOSE)) + { + if (nestLevel > 0) + break; + else + { + verbose ("%s: unexpected closing brace at line %lu\n", + getInputFileName (), getInputLineNumber ()); + longjmp (Exception, (int) ExceptionBraceFormattingError); + } + } + else if (isType (token, TOKEN_DOUBLE_COLON)) + { + addContext (st, prevToken (st, 1)); + advanceToken (st); + } + else + { + tagCheck (st); + if (isType (token, TOKEN_BRACE_OPEN)) + nest (st, nestLevel + 1); + checkStatementEnd (st); + } + } + deleteStatement (); + DebugStatement ( if (nestLevel > 0) debugParseNest (FALSE, nestLevel - 1); ) +} + +static boolean findCTags (const unsigned int passCount) +{ + exception_t exception; + boolean retry; + + Assert (passCount < 3); + cppInit ((boolean) (passCount > 1), isLanguage (Lang_csharp)); + Signature = vStringNew (); + + exception = (exception_t) setjmp (Exception); + retry = FALSE; + if (exception == ExceptionNone) + createTags (0, NULL); + else + { + deleteAllStatements (); + if (exception == ExceptionBraceFormattingError && passCount == 1) + { + retry = TRUE; + verbose ("%s: retrying file with fallback brace matching algorithm\n", + getInputFileName ()); + } + } + vStringDelete (Signature); + cppTerminate (); + return retry; +} + +static void buildKeywordHash (const langType language, unsigned int idx) +{ + const size_t count = sizeof (KeywordTable) / sizeof (KeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &KeywordTable [i]; + if (p->isValid [idx]) + addKeyword (p->name, language, (int) p->id); + } +} + +static void initializeCParser (const langType language) +{ + Lang_c = language; + buildKeywordHash (language, 0); +} + +static void initializeCppParser (const langType language) +{ + Lang_cpp = language; + buildKeywordHash (language, 1); +} + +static void initializeCsharpParser (const langType language) +{ + Lang_csharp = language; + buildKeywordHash (language, 2); +} + +static void initializeJavaParser (const langType language) +{ + Lang_java = language; + buildKeywordHash (language, 3); +} + +static void initializeVeraParser (const langType language) +{ + Lang_vera = language; + buildKeywordHash (language, 4); +} + +extern parserDefinition* CParser (void) +{ + static const char *const extensions [] = { "c", NULL }; + parserDefinition* def = parserNew ("C"); + def->kinds = CKinds; + def->kindCount = KIND_COUNT (CKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeCParser; + return def; +} + +extern parserDefinition* CppParser (void) +{ + static const char *const extensions [] = { + "c++", "cc", "cp", "cpp", "cxx", "h", "h++", "hh", "hp", "hpp", "hxx", +#ifndef CASE_INSENSITIVE_FILENAMES + "C", "H", +#endif + NULL + }; + parserDefinition* def = parserNew ("C++"); + def->kinds = CKinds; + def->kindCount = KIND_COUNT (CKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeCppParser; + return def; +} + +extern parserDefinition* CsharpParser (void) +{ + static const char *const extensions [] = { "cs", NULL }; + parserDefinition* def = parserNew ("C#"); + def->kinds = CsharpKinds; + def->kindCount = KIND_COUNT (CsharpKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeCsharpParser; + return def; +} + +extern parserDefinition* JavaParser (void) +{ + static const char *const extensions [] = { "java", NULL }; + parserDefinition* def = parserNew ("Java"); + def->kinds = JavaKinds; + def->kindCount = KIND_COUNT (JavaKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeJavaParser; + return def; +} + +extern parserDefinition* VeraParser (void) +{ + static const char *const extensions [] = { "vr", "vri", "vrh", NULL }; + parserDefinition* def = parserNew ("Vera"); + def->kinds = VeraKinds; + def->kindCount = KIND_COUNT (VeraKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeVeraParser; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/third_party/ctags/cobol.c b/third_party/ctags/cobol.c new file mode 100644 index 000000000..e49f43d65 --- /dev/null +++ b/third_party/ctags/cobol.c @@ -0,0 +1,51 @@ +// clang-format off +/* +* $Id: cobol.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for COBOL language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ +#include "third_party/ctags/parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installCobolRegex (const langType language) +{ + addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)[ \t]+(BLANK|OCCURS|IS|JUST|PIC|REDEFINES|RENAMES|SIGN|SYNC|USAGE|VALUE)", + "\\1", "d,data,data items", "i"); + addTagRegex (language, "^[ \t]*[FSR]D[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", + "\\1", "f,file,file descriptions (FD, SD, RD)", "i"); + addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", + "\\1", "g,group,group items", "i"); + addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)\\.", + "\\1", "p,paragraph,paragraphs", "i"); + addTagRegex (language, "^[ \t]*PROGRAM-ID\\.[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", + "\\1", "P,program,program ids", "i"); + addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)[ \t]+SECTION\\.", + "\\1", "s,section,sections", "i"); +} + +extern parserDefinition* CobolParser () +{ + static const char *const extensions [] = { + "cbl", "cob", "CBL", "COB", NULL }; + parserDefinition* def = parserNew ("Cobol"); + def->extensions = extensions; + def->initialize = installCobolRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/config.h b/third_party/ctags/config.h new file mode 100644 index 000000000..a1a7309d8 --- /dev/null +++ b/third_party/ctags/config.h @@ -0,0 +1,279 @@ +// clang-format off +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define this label if your system uses case-insensitive file names */ +/* #undef CASE_INSENSITIVE_FILENAMES */ + +/* Define this label if you wish to check the regcomp() function at run time + for correct behavior. This function is currently broken on Cygwin. */ +/* #undef CHECK_REGCOMP */ + +/* You can define this label to be a string containing the name of a + site-specific configuration file containing site-wide default options. The + files /etc/ctags.conf and /usr/local/etc/ctags.conf are already checked, so + only define one here if you need a file somewhere else. */ +/* #undef CUSTOM_CONFIGURATION_FILE */ + + +/* Define this as desired. + * 1: Original ctags format + * 2: Extended ctags format with extension flags in EX-style comment. + */ +#define DEFAULT_FILE_FORMAT 2 + + + +/* Define this label to use the system sort utility (which is probably more +* efficient) over the internal sorting algorithm. +*/ +#ifndef INTERNAL_SORT +# define EXTERNAL_SORT 1 +#endif + + +/* Define to 1 if you have the `chsize' function. */ +/* #undef HAVE_CHSIZE */ + +/* Define to 1 if you have the `clock' function. */ +#define HAVE_CLOCK 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `fgetpos' function. */ +#define HAVE_FGETPOS 1 + +/* Define to 1 if you have the `findfirst' function. */ +/* #undef HAVE_FINDFIRST */ + +/* Define to 1 if you have the `fnmatch' function. */ +#define HAVE_FNMATCH 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FNMATCH_H 1 + +/* Define to 1 if you have the `ftruncate' function. */ +/* #undef HAVE_FTRUNCATE */ + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `mkstemp' function. */ +#define HAVE_MKSTEMP 1 + +/* Define to 1 if you have the `opendir' function. */ +#define HAVE_OPENDIR 1 + +/* Define to 1 if you have the `putenv' function. */ +/* #undef HAVE_PUTENV */ + +/* Define to 1 if you have the `regcomp' function. */ +#define HAVE_REGCOMP 1 +#define HAVE_REGEX 1 + +/* Define to 1 if you have the `remove' function. */ +#define HAVE_REMOVE 1 + +/* Define to 1 if you have the `setenv' function. */ +#define HAVE_SETENV 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STAT_H */ + +/* Define this macro if the field "st_ino" exists in struct stat in + . */ +#define HAVE_STAT_ST_INO 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strerror' function. */ +#define HAVE_STRERROR 1 + +/* Define to 1 if you have the `stricmp' function. */ +/* #undef HAVE_STRICMP */ + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strnicmp' function. */ +/* #undef HAVE_STRNICMP */ + +/* Define to 1 if you have the `strstr' function. */ +#define HAVE_STRSTR 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_DIR_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIMES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the `tempnam' function. */ +/* #undef HAVE_TEMPNAM */ + +/* Define to 1 if you have the `times' function. */ +/* #undef HAVE_TIMES */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define to 1 if you have the `truncate' function. */ +#define HAVE_TRUNCATE 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_TYPES_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the `_findfirst' function. */ +/* #undef HAVE__FINDFIRST */ + +/* Define as the maximum integer on your system if not defined . */ +/* #undef INT_MAX */ + +/* Define to the appropriate size for tmpnam() if does not define + this. */ +#define L_tmpnam 20 + +/* Define this label if you want macro tags (defined lables) to use patterns + in the EX command by default (original ctags behavior is to use line + numbers). */ +/* #undef MACROS_USE_PATTERNS */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_FGETPOS */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_FTRUNCATE */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_GETENV */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_LSTAT */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_MALLOC */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_REMOVE */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_STAT */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_TRUNCATE */ + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +/* #undef NEED_PROTO_UNLINK */ + +/* Define this is you have a prototype for putenv() in , but doesn't + declare its argument as "const char *". */ +/* #undef NON_CONST_PUTENV_PROTOTYPE */ + +/* Package name. */ +/* #undef PACKAGE */ + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "" + +/* Define this label if regcomp() is broken. */ +/* #undef REGCOMP_BROKEN */ + +/* Define this value used by fseek() appropriately if (or + on SunOS 4.1.x) does not define them. */ +/* #undef SEEK_SET */ + +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* Define this label if your system supports starting scripts with a line of + the form "#! /bin/sh" to select the interpreter to use for the script. */ +#define SYS_INTERPRETER 1 + +/* If you wish to change the directory in which temporary files are stored, + define this label to the directory desired. */ +#define TMPDIR "/tmp" + +/* Package version. */ +/* #undef VERSION */ + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ + +/* This corrects the problem of missing prototypes for certain functions in + some GNU installations (e.g. SunOS 4.1.x). */ +/* #undef __USE_FIXED_PROTOTYPES__ */ + +/* Define to empty if `const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to long if does not define this. */ +/* #undef fpos_t */ + +/* Define to `long int' if does not define. */ +/* #undef off_t */ + +/* Define remove to unlink if you have unlink(), but not remove(). */ +/* #undef remove */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ diff --git a/third_party/ctags/ctags.h b/third_party/ctags/ctags.h new file mode 100644 index 000000000..40bb96dbd --- /dev/null +++ b/third_party/ctags/ctags.h @@ -0,0 +1,29 @@ +// clang-format off +/* +* $Id: ctags.h 702 2009-03-14 03:52:21Z dhiebert $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Program definitions +*/ +#ifndef _CTAGS_H +#define _CTAGS_H + +/* +* MACROS +*/ +#ifndef PROGRAM_VERSION +# define PROGRAM_VERSION "5.9~svn20110310" +#endif +#define PROGRAM_NAME "Exuberant Ctags" +#define PROGRAM_URL "http://ctags.sourceforge.net" +#define PROGRAM_COPYRIGHT "Copyright (C) 1996-2009" +#define AUTHOR_NAME "Darren Hiebert" +#define AUTHOR_EMAIL "dhiebert@users.sourceforge.net" + +#endif /* _CTAGS_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/ctags.mk b/third_party/ctags/ctags.mk new file mode 100644 index 000000000..a11107496 --- /dev/null +++ b/third_party/ctags/ctags.mk @@ -0,0 +1,61 @@ +#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐ +#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘ + +PKGS += THIRD_PARTY_CTAGS + +THIRD_PARTY_CTAGS_ARTIFACTS += THIRD_PARTY_CTAGS_A +THIRD_PARTY_CTAGS = $(THIRD_PARTY_CTAGS_DEPS) $(THIRD_PARTY_CTAGS_A) +THIRD_PARTY_CTAGS_A = o/$(MODE)/third_party/ctags/ctags.a +THIRD_PARTY_CTAGS_FILES := $(wildcard third_party/ctags/*) +THIRD_PARTY_CTAGS_HDRS = $(filter %.h,$(THIRD_PARTY_CTAGS_FILES)) +THIRD_PARTY_CTAGS_INCS = $(filter %.inc,$(THIRD_PARTY_CTAGS_FILES)) +THIRD_PARTY_CTAGS_SRCS = $(filter %.c,$(THIRD_PARTY_CTAGS_FILES)) +THIRD_PARTY_CTAGS_OBJS = $(THIRD_PARTY_CTAGS_SRCS:%.c=o/$(MODE)/%.o) + +THIRD_PARTY_CTAGS_DIRECTDEPS = \ + LIBC_CALLS \ + LIBC_FMT \ + LIBC_INTRIN \ + LIBC_LOG \ + LIBC_MEM \ + LIBC_NEXGEN32E \ + LIBC_RUNTIME \ + LIBC_STDIO \ + LIBC_STR \ + LIBC_STUBS \ + LIBC_SYSV \ + THIRD_PARTY_MUSL \ + THIRD_PARTY_REGEX + +THIRD_PARTY_CTAGS_DEPS := \ + $(call uniq,$(foreach x,$(THIRD_PARTY_CTAGS_DIRECTDEPS),$($(x)))) + +THIRD_PARTY_CTAGS_CHECKS = \ + $(THIRD_PARTY_CTAGS_A).pkg \ + $(THIRD_PARTY_CTAGS_HDRS:%=o/$(MODE)/%.ok) + +$(THIRD_PARTY_CTAGS_A): \ + third_party/ctags/ \ + $(THIRD_PARTY_CTAGS_A).pkg \ + $(THIRD_PARTY_CTAGS_OBJS) + +$(THIRD_PARTY_CTAGS_A).pkg: \ + $(THIRD_PARTY_CTAGS_OBJS) \ + $(foreach x,$(THIRD_PARTY_CTAGS_DIRECTDEPS),$($(x)_A).pkg) + +o/$(MODE)/third_party/ctags/ctags.com.dbg: \ + $(THIRD_PARTY_CTAGS) \ + o/$(MODE)/third_party/ctags/main.o \ + $(CRT) \ + $(APE_NO_MODIFY_SELF) + @$(APELINK) + +THIRD_PARTY_CTAGS_LIBS = $(THIRD_PARTY_CTAGS_A) +THIRD_PARTY_CTAGS_BINS = $(THIRD_PARTY_CTAGS_COMS) $(THIRD_PARTY_CTAGS_COMS:%=%.dbg) +THIRD_PARTY_CTAGS_COMS = o/$(MODE)/third_party/ctags/ctags.com +$(THIRD_PARTY_CTAGS_OBJS): $(BUILD_FILES) third_party/ctags/ctags.mk + +.PHONY: o/$(MODE)/third_party/ctags +o/$(MODE)/third_party/ctags: \ + $(THIRD_PARTY_CTAGS_BINS) \ + $(THIRD_PARTY_CTAGS_CHECKS) diff --git a/third_party/ctags/debug.c b/third_party/ctags/debug.c new file mode 100644 index 000000000..cbba56c35 --- /dev/null +++ b/third_party/ctags/debug.c @@ -0,0 +1,114 @@ +// clang-format off +/* +* $Id: debug.c 558 2007-06-15 19:17:02Z elliotth $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains debugging functions. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/str/str.h" + + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" + +/* +* FUNCTION DEFINITIONS +*/ + +#ifdef DEBUG + +extern void lineBreak (void) {} /* provides a line-specified break point */ + +extern void debugPrintf ( + const enum eDebugLevels level, const char *const format, ... ) +{ + va_list ap; + + va_start (ap, format); + if (debug (level)) + vprintf (format, ap); + fflush (stdout); + va_end (ap); +} + +extern void debugPutc (const int level, const int c) +{ + if (debug (level) && c != EOF) + { + if (c == STRING_SYMBOL) printf ("\"string\""); + else if (c == CHAR_SYMBOL) printf ("'c'"); + else putchar (c); + + fflush (stdout); + } +} + +extern void debugParseNest (const boolean increase, const unsigned int level) +{ + debugPrintf (DEBUG_PARSE, "<*%snesting:%d*>", increase ? "++" : "--", level); +} + +extern void debugCppNest (const boolean begin, const unsigned int level) +{ + debugPrintf (DEBUG_CPP, "<*cpp:%s level %d*>", begin ? "begin":"end", level); +} + +extern void debugCppIgnore (const boolean ignore) +{ + debugPrintf (DEBUG_CPP, "<*cpp:%s ignore*>", ignore ? "begin":"end"); +} + +extern void debugEntry (const tagEntryInfo *const tag) +{ + const char *const scope = tag->isFileScope ? "{fs}" : ""; + + if (debug (DEBUG_PARSE)) + { + printf ("<#%s%s:%s", scope, tag->kindName, tag->name); + + if (tag->extensionFields.scope [0] != NULL && + tag->extensionFields.scope [1] != NULL) + printf (" [%s:%s]", tag->extensionFields.scope [0], + tag->extensionFields.scope [1]); + + if (Option.extensionFields.inheritance && + tag->extensionFields.inheritance != NULL) + printf (" [inherits:%s]", tag->extensionFields.inheritance); + + if (Option.extensionFields.fileScope && + tag->isFileScope && ! isHeaderFile ()) + printf (" [file:]"); + + if (Option.extensionFields.access && + tag->extensionFields.access != NULL) + printf (" [access:%s]", tag->extensionFields.access); + + if (Option.extensionFields.implementation && + tag->extensionFields.implementation != NULL) + printf (" [imp:%s]", tag->extensionFields.implementation); + + if (Option.extensionFields.typeRef && + tag->extensionFields.typeRef [0] != NULL && + tag->extensionFields.typeRef [1] != NULL) + printf (" [%s:%s]", tag->extensionFields.typeRef [0], + tag->extensionFields.typeRef [1]); + + printf ("#>"); + fflush (stdout); + } +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/debug.h b/third_party/ctags/debug.h new file mode 100644 index 000000000..810c904b4 --- /dev/null +++ b/third_party/ctags/debug.h @@ -0,0 +1,71 @@ +// clang-format off +/* +* $Id: debug.h 558 2007-06-15 19:17:02Z elliotth $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to debug.c +*/ +#ifndef _DEBUG_H +#define _DEBUG_H + +/* +* Include files +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#ifdef DEBUG +#include "libc/assert.h" +#endif +#include "third_party/ctags/entry.h" + +/* +* Macros +*/ + +#ifdef DEBUG +# define debug(level) ((Option.debugLevel & (long)(level)) != 0) +# define DebugStatement(x) x +# define PrintStatus(x) if (debug(DEBUG_STATUS)) printf x; +# define Assert(c) assert(c) +#else +# define DebugStatement(x) +# define PrintStatus(x) +# define Assert(c) +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +/* +* Data declarations +*/ + +/* Defines the debugging levels. + */ +enum eDebugLevels { + DEBUG_READ = 0x01, /* echo raw (filtered) characters */ + DEBUG_PARSE = 0x02, /* echo parsing results */ + DEBUG_STATUS = 0x04, /* echo file status information */ + DEBUG_OPTION = 0x08, /* echo option parsing */ + DEBUG_CPP = 0x10, /* echo characters out of pre-processor */ + DEBUG_RAW = 0x20 /* echo raw (filtered) characters */ +}; + +/* +* Function prototypes +*/ +extern void lineBreak (void); +extern void debugPrintf (const enum eDebugLevels level, const char *const format, ...) __printf (2, 3); +extern void debugPutc (const int level, const int c); +extern void debugParseNest (const boolean increase, const unsigned int level); +extern void debugCppNest (const boolean begin, const unsigned int level); +extern void debugCppIgnore (const boolean ignore); +extern void debugEntry (const tagEntryInfo *const tag); + +#endif /* _DEBUG_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/dosbatch.c b/third_party/ctags/dosbatch.c new file mode 100644 index 000000000..27ddfac95 --- /dev/null +++ b/third_party/ctags/dosbatch.c @@ -0,0 +1,44 @@ +// clang-format off +/* +* $Id$ +* +* Copyright (c) 2009, David Fishburn +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for DOS Batch language files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "third_party/ctags/parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installDosBatchRegex (const langType language) +{ + addTagRegex (language, + "^:([A-Za-z_0-9]+)", "\\1", "l,label,labels", NULL); + addTagRegex (language, + "set[ \t]+([A-Za-z_0-9]+)[ \t]*=", "\\1", "v,variable,variables", NULL); +} + +extern parserDefinition* DosBatchParser () +{ + static const char *const extensions [] = { "bat", "cmd", NULL }; + parserDefinition* const def = parserNew ("DosBatch"); + def->extensions = extensions; + def->initialize = installDosBatchRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/eiffel.c b/third_party/ctags/eiffel.c new file mode 100644 index 000000000..4a52745bc --- /dev/null +++ b/third_party/ctags/eiffel.c @@ -0,0 +1,1373 @@ +// clang-format off +/* +* $Id: eiffel.c 748 2009-11-06 02:44:42Z dhiebert $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Eiffel language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "libc/fmt/conv.h" +#include "third_party/ctags/general.h" /* must always come first */ + +#ifdef TYPE_REFERENCE_TOOL +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#endif +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/limits.h" +#include "libc/sysv/consts/_posix.h" +#include "libc/sysv/consts/iov.h" +#include "libc/sysv/consts/limits.h" +#include "libc/sysv/consts/xopen.h" +#include "libc/thread/thread.h" +#include "libc/str/str.h" /* to define tolower () */ +#include "libc/runtime/runtime.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" +#ifndef TYPE_REFERENCE_TOOL +#include "third_party/ctags/entry.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#endif + +/* +* MACROS +*/ +#define isident(c) (isalnum(c) || (c) == '_') +#define isFreeOperatorChar(c) ((c) == '@' || (c) == '#' || \ + (c) == '|' || (c) == '&') +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* +* DATA DECLARATIONS +*/ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_alias, KEYWORD_all, KEYWORD_and, + KEYWORD_as, KEYWORD_assign, KEYWORD_attached, + KEYWORD_check, KEYWORD_class, KEYWORD_convert, KEYWORD_create, + KEYWORD_creation, KEYWORD_Current, + KEYWORD_debug, KEYWORD_deferred, KEYWORD_detachable, KEYWORD_do, + KEYWORD_else, KEYWORD_elseif, KEYWORD_end, KEYWORD_ensure, + KEYWORD_expanded, KEYWORD_export, KEYWORD_external, + KEYWORD_false, KEYWORD_feature, KEYWORD_from, KEYWORD_frozen, + KEYWORD_if, KEYWORD_implies, + KEYWORD_indexing, KEYWORD_infix, KEYWORD_inherit, KEYWORD_inspect, + KEYWORD_invariant, KEYWORD_is, KEYWORD_like, KEYWORD_local, + KEYWORD_loop, KEYWORD_not, KEYWORD_obsolete, KEYWORD_old, KEYWORD_once, + KEYWORD_or, KEYWORD_prefix, KEYWORD_redefine, KEYWORD_rename, + KEYWORD_require, KEYWORD_rescue, KEYWORD_Result, KEYWORD_retry, + KEYWORD_select, KEYWORD_separate, KEYWORD_strip, KEYWORD_then, + KEYWORD_true, KEYWORD_undefine, KEYWORD_unique, KEYWORD_until, + KEYWORD_variant, KEYWORD_when, KEYWORD_xor +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_BANG, + TOKEN_CHARACTER, + TOKEN_CLOSE_BRACE, + TOKEN_CLOSE_BRACKET, + TOKEN_CLOSE_PAREN, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_CONSTRAINT, + TOKEN_DOT, + TOKEN_DOLLAR, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_NUMERIC, + TOKEN_OPEN_BRACE, + TOKEN_OPEN_BRACKET, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_OTHER, + TOKEN_QUESTION, + TOKEN_SEMICOLON, + TOKEN_SEPARATOR, + TOKEN_STRING, + TOKEN_TILDE +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + boolean isExported; + vString* string; + vString* className; + vString* featureName; +} tokenInfo; + +/* +* DATA DEFINITIONS +*/ + +static langType Lang_eiffel; + +#ifdef TYPE_REFERENCE_TOOL + +static const char *FileName; +static FILE *File; +static int PrintClass; +static int PrintReferences; +static int SelfReferences; +static int Debug; +static stringList *GenericNames; +static stringList *ReferencedTypes; + +#else + +typedef enum { + EKIND_CLASS, EKIND_FEATURE, EKIND_LOCAL, EKIND_QUALIFIED_TAGS +} eiffelKind; + +static kindOption EiffelKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'f', "feature", "features"}, + { FALSE, 'l', "local", "local entities"} +}; + +#endif + +static jmp_buf Exception; + +static const keywordDesc EiffelKeywordTable [] = { + /* keyword keyword ID */ + { "alias", KEYWORD_alias }, + { "all", KEYWORD_all }, + { "and", KEYWORD_and }, + { "as", KEYWORD_as }, + { "assign", KEYWORD_assign }, + { "attached", KEYWORD_attached }, + { "check", KEYWORD_check }, + { "class", KEYWORD_class }, + { "convert", KEYWORD_convert }, + { "create", KEYWORD_create }, + { "creation", KEYWORD_creation }, + { "current", KEYWORD_Current }, + { "debug", KEYWORD_debug }, + { "deferred", KEYWORD_deferred }, + { "detachable", KEYWORD_detachable }, + { "do", KEYWORD_do }, + { "else", KEYWORD_else }, + { "elseif", KEYWORD_elseif }, + { "end", KEYWORD_end }, + { "ensure", KEYWORD_ensure }, + { "expanded", KEYWORD_expanded }, + { "export", KEYWORD_export }, + { "external", KEYWORD_external }, + { "false", KEYWORD_false }, + { "feature", KEYWORD_feature }, + { "from", KEYWORD_from }, + { "frozen", KEYWORD_frozen }, + { "if", KEYWORD_if }, + { "implies", KEYWORD_implies }, + { "indexing", KEYWORD_indexing }, + { "infix", KEYWORD_infix }, + { "inherit", KEYWORD_inherit }, + { "inspect", KEYWORD_inspect }, + { "invariant", KEYWORD_invariant }, + { "is", KEYWORD_is }, + { "like", KEYWORD_like }, + { "local", KEYWORD_local }, + { "loop", KEYWORD_loop }, + { "not", KEYWORD_not }, + { "obsolete", KEYWORD_obsolete }, + { "old", KEYWORD_old }, + { "once", KEYWORD_once }, + { "or", KEYWORD_or }, + { "prefix", KEYWORD_prefix }, + { "redefine", KEYWORD_redefine }, + { "rename", KEYWORD_rename }, + { "require", KEYWORD_require }, + { "rescue", KEYWORD_rescue }, + { "result", KEYWORD_Result }, + { "retry", KEYWORD_retry }, + { "select", KEYWORD_select }, + { "separate", KEYWORD_separate }, + { "strip", KEYWORD_strip }, + { "then", KEYWORD_then }, + { "true", KEYWORD_true }, + { "undefine", KEYWORD_undefine }, + { "unique", KEYWORD_unique }, + { "until", KEYWORD_until }, + { "variant", KEYWORD_variant }, + { "when", KEYWORD_when }, + { "xor", KEYWORD_xor } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void buildEiffelKeywordHash (void) +{ + const size_t count = sizeof (EiffelKeywordTable) / + sizeof (EiffelKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &EiffelKeywordTable [i]; + addKeyword (p->name, Lang_eiffel, (int) p->id); + } +} + +#ifdef TYPE_REFERENCE_TOOL + +static void addGenericName (tokenInfo *const token) +{ + vStringUpper (token->string); + if (vStringLength (token->string) > 0) + stringListAdd (GenericNames, vStringNewCopy (token->string)); +} + +static boolean isGeneric (tokenInfo *const token) +{ + return (boolean) stringListHas (GenericNames, vStringValue (token->string)); +} + +static void reportType (tokenInfo *const token) +{ + vStringUpper (token->string); + if (vStringLength (token->string) > 0 && ! isGeneric (token) && + (SelfReferences || strcmp (vStringValue ( + token->string), vStringValue (token->className)) != 0) && + ! stringListHas (ReferencedTypes, vStringValue (token->string))) + { + printf ("%s\n", vStringValue (token->string)); + stringListAdd (ReferencedTypes, vStringNewCopy (token->string)); + } +} + +static int fileGetc (void) +{ + int c = getc (File); + if (c == '\r') + { + c = getc (File); + if (c != '\n') + { + ungetc (c, File); + c = '\n'; + } + } + if (Debug > 0 && c != EOF) + putc (c, errout); + return c; +} + +static int fileUngetc (c) +{ + return ungetc (c, File); +} + +extern char *readLine (vString *const vLine, FILE *const fp) +{ + return NULL; +} + +#else + +/* +* Tag generation functions +*/ + +static void makeEiffelClassTag (tokenInfo *const token) +{ + if (EiffelKinds [EKIND_CLASS].enabled) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + + initTagEntry (&e, name); + + e.kindName = EiffelKinds [EKIND_CLASS].name; + e.kind = EiffelKinds [EKIND_CLASS].letter; + + makeTagEntry (&e); + } + vStringCopy (token->className, token->string); +} + +static void makeEiffelFeatureTag (tokenInfo *const token) +{ + if (EiffelKinds [EKIND_FEATURE].enabled && + (token->isExported || Option.include.fileScope)) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + + initTagEntry (&e, name); + + e.isFileScope = (boolean) (! token->isExported); + e.kindName = EiffelKinds [EKIND_FEATURE].name; + e.kind = EiffelKinds [EKIND_FEATURE].letter; + e.extensionFields.scope [0] = EiffelKinds [EKIND_CLASS].name; + e.extensionFields.scope [1] = vStringValue (token->className); + + makeTagEntry (&e); + + if (Option.include.qualifiedTags) + { + vString* qualified = vStringNewInit (vStringValue (token->className)); + vStringPut (qualified, '.'); + vStringCat (qualified, token->string); + e.name = vStringValue (qualified); + makeTagEntry (&e); + vStringDelete (qualified); + } + } + vStringCopy (token->featureName, token->string); +} + +static void makeEiffelLocalTag (tokenInfo *const token) +{ + if (EiffelKinds [EKIND_LOCAL].enabled && Option.include.fileScope) + { + const char *const name = vStringValue (token->string); + vString* scope = vStringNew (); + tagEntryInfo e; + + initTagEntry (&e, name); + + e.isFileScope = TRUE; + e.kindName = EiffelKinds [EKIND_LOCAL].name; + e.kind = EiffelKinds [EKIND_LOCAL].letter; + + vStringCopy (scope, token->className); + vStringPut (scope, '.'); + vStringCat (scope, token->featureName); + + e.extensionFields.scope [0] = EiffelKinds [EKIND_FEATURE].name; + e.extensionFields.scope [1] = vStringValue (scope); + + makeTagEntry (&e); + vStringDelete (scope); + } +} + +#endif + +/* +* Parsing functions +*/ + +static int skipToCharacter (const int c) +{ + int d; + + do + { + d = fileGetc (); + } while (d != EOF && d != c); + + return d; +} + +/* If a numeric is passed in 'c', this is used as the first digit of the + * numeric being parsed. + */ +static vString *parseInteger (int c) +{ + vString *string = vStringNew (); + + if (c == '\0') + c = fileGetc (); + if (c == '-') + { + vStringPut (string, c); + c = fileGetc (); + } + else if (! isdigit (c)) + c = fileGetc (); + while (c != EOF && (isdigit (c) || c == '_')) + { + vStringPut (string, c); + c = fileGetc (); + } + vStringTerminate (string); + fileUngetc (c); + + return string; +} + +static vString *parseNumeric (int c) +{ + vString *string = vStringNew (); + vString *integer = parseInteger (c); + vStringCopy (string, integer); + vStringDelete (integer); + + c = fileGetc (); + if (c == '.') + { + integer = parseInteger ('\0'); + vStringPut (string, c); + vStringCat (string, integer); + vStringDelete (integer); + c = fileGetc (); + } + if (tolower (c) == 'e') + { + integer = parseInteger ('\0'); + vStringPut (string, c); + vStringCat (string, integer); + vStringDelete (integer); + } + else if (!isspace (c)) + fileUngetc (c); + + vStringTerminate (string); + + return string; +} + +static int parseEscapedCharacter (void) +{ + int d = '\0'; + int c = fileGetc (); + + switch (c) + { + case 'A': d = '@'; break; + case 'B': d = '\b'; break; + case 'C': d = '^'; break; + case 'D': d = '$'; break; + case 'F': d = '\f'; break; + case 'H': d = '\\'; break; + case 'L': d = '~'; break; + case 'N': d = '\n'; break; +#ifdef QDOS + case 'Q': d = 0x9F; break; +#else + case 'Q': d = '`'; break; +#endif + case 'R': d = '\r'; break; + case 'S': d = '#'; break; + case 'T': d = '\t'; break; + case 'U': d = '\0'; break; + case 'V': d = '|'; break; + case '%': d = '%'; break; + case '\'': d = '\''; break; + case '"': d = '"'; break; + case '(': d = '['; break; + case ')': d = ']'; break; + case '<': d = '{'; break; + case '>': d = '}'; break; + + case '\n': skipToCharacter ('%'); break; + + case '/': + { + vString *string = parseInteger ('\0'); + const char *value = vStringValue (string); + const unsigned long ascii = atol (value); + vStringDelete (string); + + c = fileGetc (); + if (c == '/' && ascii < 256) + d = ascii; + break; + } + + default: break; + } + return d; +} + +static int parseCharacter (void) +{ + int c = fileGetc (); + int result = c; + + if (c == '%') + result = parseEscapedCharacter (); + + c = fileGetc (); + if (c != '\'') + skipToCharacter ('\n'); + + return result; +} + +static void parseString (vString *const string) +{ + boolean verbatim = FALSE; + boolean align = FALSE; + boolean end = FALSE; + vString *verbatimCloser = vStringNew (); + vString *lastLine = vStringNew (); + int prev = '\0'; + int c; + + while (! end) + { + c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '"') + { + if (! verbatim) + end = TRUE; + else + end = (boolean) (strcmp (vStringValue (lastLine), + vStringValue (verbatimCloser)) == 0); + } + else if (c == '\n') + { + if (verbatim) + vStringClear (lastLine); + if (prev == '[' /* || prev == '{' */) + { + verbatim = TRUE; + vStringClear (verbatimCloser); + vStringClear (lastLine); + if (prev == '{') + vStringPut (verbatimCloser, '}'); + else + { + vStringPut (verbatimCloser, ']'); + align = TRUE; + } + vStringNCat (verbatimCloser, string, vStringLength (string) - 1); + vStringClear (string); + } + if (verbatim && align) + { + do + c = fileGetc (); + while (isspace (c)); + } + } + else if (c == '%') + c = parseEscapedCharacter (); + if (! end) + { + vStringPut (string, c); + if (verbatim) + { + vStringPut (lastLine, c); + vStringTerminate (lastLine); + } + prev = c; + } + } + vStringTerminate (string); + vStringDelete (lastLine); + vStringDelete (verbatimCloser); +} + +/* Read a C identifier beginning with "firstChar" and places it into "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isident (c)); + + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void parseFreeOperator (vString *const string, const int firstChar) +{ + int c = firstChar; + + do + { + vStringPut (string, c); + c = fileGetc (); + } while (c > ' '); + + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void copyToken (tokenInfo* dst, const tokenInfo *src) +{ + dst->type = src->type; + dst->keyword = src->keyword; + dst->isExported = src->isExported; + + vStringCopy (dst->string, src->string); + vStringCopy (dst->className, src->className); + vStringCopy (dst->featureName, src->featureName); +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->isExported = TRUE; + + token->string = vStringNew (); + token->className = vStringNew (); + token->featureName = vStringNew (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->className); + vStringDelete (token->featureName); + + eFree (token); +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + + do + c = fileGetc (); + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case ';': token->type = TOKEN_SEMICOLON; break; + case '!': token->type = TOKEN_BANG; break; + case '}': token->type = TOKEN_CLOSE_BRACE; break; + case ']': token->type = TOKEN_CLOSE_BRACKET; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ',': token->type = TOKEN_COMMA; break; + case '$': token->type = TOKEN_DOLLAR; break; + case '.': token->type = TOKEN_DOT; break; + case '{': token->type = TOKEN_OPEN_BRACE; break; + case '[': token->type = TOKEN_OPEN_BRACKET; break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case '~': token->type = TOKEN_TILDE; break; + + + case '+': + case '*': + case '^': + case '=': token->type = TOKEN_OPERATOR; break; + + case '-': + c = fileGetc (); + if (c == '>') + token->type = TOKEN_CONSTRAINT; + else if (c == '-') /* is this the start of a comment? */ + { + skipToCharacter ('\n'); + goto getNextChar; + } + else + { + if (!isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + } + break; + + case '?': + case ':': + { + int c2 = fileGetc (); + if (c2 == '=') + token->type = TOKEN_OPERATOR; + else + { + if (!isspace (c2)) + fileUngetc (c2); + if (c == ':') + token->type = TOKEN_COLON; + else + token->type = TOKEN_QUESTION; + } + break; + } + + case '<': + c = fileGetc (); + if (c != '=' && c != '>' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + break; + + case '>': + c = fileGetc (); + if (c != '=' && c != '>' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + break; + + case '/': + c = fileGetc (); + if (c != '/' && c != '=' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + break; + + case '\\': + c = fileGetc (); + if (c != '\\' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + break; + + case '"': + token->type = TOKEN_STRING; + parseString (token->string); + break; + + case '\'': + token->type = TOKEN_CHARACTER; + parseCharacter (); + break; + + default: + if (isalpha (c)) + { + parseIdentifier (token->string, c); + token->keyword = analyzeToken (token->string, Lang_eiffel); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + else if (isdigit (c)) + { + vString* numeric = parseNumeric (c); + vStringCat (token->string, numeric); + vStringDelete (numeric); + token->type = TOKEN_NUMERIC; + } + else if (isFreeOperatorChar (c)) + { + parseFreeOperator (token->string, c); + token->type = TOKEN_OPERATOR; + } + else + { + token->type = TOKEN_UNDEFINED; + Assert (! isType (token, TOKEN_UNDEFINED)); + } + break; + } +} + +/* +* Scanning functions +*/ + +static boolean isIdentifierMatch ( + const tokenInfo *const token, const char *const name) +{ + return (boolean) (isType (token, TOKEN_IDENTIFIER) && + strcasecmp (vStringValue (token->string), name) == 0); +} + +static void findToken (tokenInfo *const token, const tokenType type) +{ + while (! isType (token, type)) + readToken (token); +} + +static void findKeyword (tokenInfo *const token, const keywordId keyword) +{ + while (! isKeyword (token, keyword)) + readToken (token); +} + +static boolean parseType (tokenInfo *const token); + +static void parseGeneric (tokenInfo *const token, boolean declaration __unused) +{ + unsigned int depth = 0; +#ifdef TYPE_REFERENCE_TOOL + boolean constraint = FALSE; +#endif + Assert (isType (token, TOKEN_OPEN_BRACKET)); + do + { + if (isType (token, TOKEN_OPEN_BRACKET)) + { + ++depth; + readToken (token); + } + else if (isType (token, TOKEN_CLOSE_BRACKET)) + { + --depth; + readToken (token); + } +#ifdef TYPE_REFERENCE_TOOL + else if (declaration) + { + boolean advanced = FALSE; + if (depth == 1) + { + if (isType (token, TOKEN_CONSTRAINT)) + constraint = TRUE; + else if (isKeyword (token, KEYWORD_create)) + findKeyword (token, KEYWORD_end); + else if (isType (token, TOKEN_IDENTIFIER)) + { + if (constraint) + advanced = parseType (token); + else + addGenericName (token); + constraint = FALSE; + } + } + else if (isType (token, TOKEN_IDENTIFIER)) + advanced = parseType (token); + if (! advanced) + readToken (token); + } +#endif + else + parseType (token); + } while (depth > 0); +} + +static boolean parseType (tokenInfo *const token) +{ + tokenInfo* const id = newToken (); + copyToken (id, token); + readToken (token); + if (isType (token, TOKEN_COLON)) /* check for "{entity: TYPE}" */ + { + readToken (id); + readToken (token); + } + if (isKeyword (id, KEYWORD_like)) + { + if (isType (token, TOKEN_IDENTIFIER) || + isKeyword (token, KEYWORD_Current)) + readToken (token); + } + else + { + if (isKeyword (id, KEYWORD_attached) || + isKeyword (id, KEYWORD_detachable) || + isKeyword (id, KEYWORD_expanded)) + { + copyToken (id, token); + readToken (token); + } + if (isType (id, TOKEN_IDENTIFIER)) + { +#ifdef TYPE_REFERENCE_TOOL + reportType (id); +#endif + if (isType (token, TOKEN_OPEN_BRACKET)) + parseGeneric (token, FALSE); + else if ((strcmp ("BIT", vStringValue (id->string)) == 0)) + readToken (token); /* read token after number of bits */ + } + } + deleteToken (id); + return TRUE; +} + +static void parseEntityType (tokenInfo *const token) +{ + Assert (isType (token, TOKEN_COLON)); + readToken (token); + + if (isType (token, TOKEN_BANG) || isType (token, TOKEN_QUESTION)) + readToken (token); /* skip over '!' or '?' */ + parseType (token); +} + +static void parseLocal (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_local)); + readToken (token); + + /* Check keyword first in case local clause is empty + */ + while (! isKeyword (token, KEYWORD_do) && + ! isKeyword (token, KEYWORD_once)) + { +#ifndef TYPE_REFERENCE_TOOL + if (isType (token, TOKEN_IDENTIFIER)) + makeEiffelLocalTag (token); +#endif + readToken (token); + if (isType (token, TOKEN_COLON)) + parseEntityType (token); + } +} + +static void findFeatureEnd (tokenInfo *const token) +{ + boolean isFound = isKeyword (token, KEYWORD_is); + if (isFound) + readToken (token); + switch (token->keyword) + { + case KEYWORD_deferred: + case KEYWORD_do: + case KEYWORD_external: + case KEYWORD_local: + case KEYWORD_obsolete: + case KEYWORD_once: + case KEYWORD_require: + { + int depth = 1; + + while (depth > 0) + { +#ifdef TYPE_REFERENCE_TOOL + if (isType (token, TOKEN_OPEN_BRACE)) + { + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + parseType (token); + } + else if (isType (token, TOKEN_BANG)) + { + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + parseType (token); + if (isType (token, TOKEN_BANG)) + readToken (token); + } + else +#endif + switch (token->keyword) + { + case KEYWORD_check: + case KEYWORD_debug: + case KEYWORD_from: + case KEYWORD_if: + case KEYWORD_inspect: + ++depth; + break; + + case KEYWORD_local: + parseLocal (token); + break; + + case KEYWORD_end: + --depth; + break; + + default: + break; + } + readToken (token); + } + break; + } + + default: + /* is this a manifest constant? */ + if (isFound || isType (token, TOKEN_OPERATOR)) { + if (isType (token, TOKEN_OPERATOR)) + readToken (token); + readToken (token); + } + break; + } +} + +static boolean readFeatureName (tokenInfo *const token) +{ + boolean isFeatureName = FALSE; + + if (isKeyword (token, KEYWORD_frozen)) + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + isFeatureName = TRUE; + else if (isKeyword (token, KEYWORD_assign)) /* legacy code */ + isFeatureName = TRUE; + else if (isKeyword (token, KEYWORD_infix) || + isKeyword (token, KEYWORD_prefix)) + { + readToken (token); + if (isType (token, TOKEN_STRING)) + isFeatureName = TRUE; + } + return isFeatureName; +} + +static void parseArguments (tokenInfo *const token) +{ +#ifndef TYPE_REFERENCE_TOOL + findToken (token, TOKEN_CLOSE_PAREN); + readToken (token); +#else + Assert (isType (token, TOKEN_OPEN_PAREN)); + readToken (token); + do + { + if (isType (token, TOKEN_COLON)) + parseEntityType (token); + else + readToken (token); + } while (! isType (token, TOKEN_CLOSE_PAREN)); + readToken (token); +#endif +} + +static boolean parseFeature (tokenInfo *const token) +{ + boolean found = FALSE; + while (readFeatureName (token)) + { + found = TRUE; +#ifndef TYPE_REFERENCE_TOOL + makeEiffelFeatureTag (token); +#endif + readToken (token); + if (isType (token, TOKEN_COMMA)) + readToken (token); + } + if (found) + { + if (isKeyword (token, KEYWORD_alias)) { + readToken (token); +#ifndef TYPE_REFERENCE_TOOL + if (isType (token, TOKEN_STRING)) + makeEiffelFeatureTag (token); +#endif + readToken (token); + } + if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ + parseArguments (token); + if (isType (token, TOKEN_COLON)) /* a query? */ + parseEntityType (token); + if (isKeyword (token, KEYWORD_assign)) + { + readToken (token); + readToken (token); + } + if (isKeyword (token, KEYWORD_obsolete)) + { + readToken (token); + if (isType (token, TOKEN_STRING)) + readToken (token); + } + findFeatureEnd (token); + } + return found; +} + +static void parseExport (tokenInfo *const token) +{ + token->isExported = TRUE; + readToken (token); + if (isType (token, TOKEN_OPEN_BRACE)) + { + token->isExported = FALSE; + while (! isType (token, TOKEN_CLOSE_BRACE)) + { + if (isType (token, TOKEN_IDENTIFIER)) + token->isExported |= !isIdentifierMatch (token, "NONE"); + readToken (token); + } + readToken (token); + } +} + +static void parseFeatureClauses (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_feature)); + do + { + if (isKeyword (token, KEYWORD_feature)) + parseExport (token); + if (! isKeyword (token, KEYWORD_feature) && + ! isKeyword (token, KEYWORD_invariant) && + ! isKeyword (token, KEYWORD_indexing)) + { + if (! parseFeature (token)) + readToken (token); + } + } while (! isKeyword (token, KEYWORD_end) && + ! isKeyword (token, KEYWORD_invariant) && + ! isKeyword (token, KEYWORD_indexing)); +} + +static void parseRename (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_rename)); + do { + readToken (token); + if (readFeatureName (token)) + { + readToken (token); + if (isKeyword (token, KEYWORD_as)) + { + readToken (token); + if (readFeatureName (token)) + { +#ifndef TYPE_REFERENCE_TOOL + makeEiffelFeatureTag (token); /* renamed feature */ +#endif + readToken (token); + } + } + } + } while (isType (token, TOKEN_COMMA)); +} + +static void parseInherit (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_inherit)); + readToken (token); + while (isType (token, TOKEN_IDENTIFIER)) + { + parseType (token); + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) /* check for feature adaptation */ + { + case KEYWORD_rename: + parseRename (token); + case KEYWORD_export: + case KEYWORD_undefine: + case KEYWORD_redefine: + case KEYWORD_select: + findKeyword (token, KEYWORD_end); + readToken (token); + break; + + case KEYWORD_end: + readToken (token); + break; + + default: break; + } + } + if (isType (token, TOKEN_SEMICOLON)) + readToken (token); + } +} + +static void parseConvert (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_convert)); + do + { + readToken (token); + if (! isType (token, TOKEN_IDENTIFIER)) + break; + else if (isType (token, TOKEN_OPEN_PAREN)) + { + while (! isType (token, TOKEN_CLOSE_PAREN)) + readToken (token); + } + else if (isType (token, TOKEN_COLON)) + { + readToken (token); + if (! isType (token, TOKEN_OPEN_BRACE)) + break; + else while (! isType (token, TOKEN_CLOSE_BRACE)) + readToken (token); + } + } while (isType (token, TOKEN_COMMA)); +} + +static void parseClass (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_class)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { +#ifndef TYPE_REFERENCE_TOOL + makeEiffelClassTag (token); + readToken (token); +#else + vStringCopy (token->className, token->string); + vStringUpper (token->className); + if (PrintClass) + puts (vStringValue (token->className)); + if (! PrintReferences) + exit (0); + readToken (token); +#endif + } + + do + { + if (isType (token, TOKEN_OPEN_BRACKET)) + parseGeneric (token, TRUE); + else if (! isType (token, TOKEN_KEYWORD)) + readToken (token); + else switch (token->keyword) + { + case KEYWORD_inherit: parseInherit (token); break; + case KEYWORD_feature: parseFeatureClauses (token); break; + case KEYWORD_convert: parseConvert (token); break; + default: readToken (token); break; + } + } while (! isKeyword (token, KEYWORD_end)); +} + +static void initialize (const langType language) +{ + Lang_eiffel = language; + buildEiffelKeywordHash (); +} + +static void findEiffelTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + { + findKeyword (token, KEYWORD_class); + parseClass (token); + } + deleteToken (token); +} + +#ifndef TYPE_REFERENCE_TOOL + +extern parserDefinition* EiffelParser (void) +{ + static const char *const extensions [] = { "e", NULL }; + parserDefinition* def = parserNew ("Eiffel"); + def->kinds = EiffelKinds; + def->kindCount = KIND_COUNT (EiffelKinds); + def->extensions = extensions; + def->parser = findEiffelTags; + def->initialize = initialize; + return def; +} + +#else + +static void findReferences (void) +{ + ReferencedTypes = stringListNew (); + GenericNames = stringListNew (); + initialize (0); + + findEiffelTags (); + + stringListDelete (GenericNames); + GenericNames = NULL; + stringListDelete (ReferencedTypes); + ReferencedTypes = NULL; +} + +static const char *const Usage = + "Prints names of types referenced by an Eiffel language file.\n" + "\n" + "Usage: %s [-cdrs] [file_name | -]\n" + "\n" + "Options:\n" + " -c Print class name of current file (on first line of output).\n" + " -d Enable debug output.\n" + " -r Print types referenced by current file (default unless -c).\n" + " -s Include self-references.\n" + "\n"; + +extern int main (int argc, char** argv) +{ + int i; + for (i = 1 ; argv [i] != NULL ; ++i) + { + const char *const arg = argv [i]; + if (arg [0] == '-') + { + int j; + if (arg [1] == '\0') + { + File = stdin; + FileName = "stdin"; + } + else for (j = 1 ; arg [j] != '\0' ; ++j) switch (arg [j]) + { + case 'c': PrintClass = 1; break; + case 'r': PrintReferences = 1; break; + case 's': SelfReferences = 1; break; + case 'd': Debug = 1; break; + default: + fprintf (errout, "%s: unknown option: %c\n", argv [0], arg [1]); + fprintf (errout, Usage, argv [0]); + exit (1); + break; + } + } + else if (File != NULL) + { + fprintf (errout, Usage, argv [0]); + exit (1); + } + else + { + FileName = arg; + File = fopen (FileName, "r"); + if (File == NULL) + { + perror (argv [0]); + exit (1); + } + } + } + if (! PrintClass) + PrintReferences = 1; + if (File == NULL) + { + fprintf (errout, Usage, argv [0]); + exit (1); + } + else + { + findReferences (); + fclose (File); + } + return 0; +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/entry.c b/third_party/ctags/entry.c new file mode 100644 index 000000000..246505097 --- /dev/null +++ b/third_party/ctags/entry.c @@ -0,0 +1,879 @@ +// clang-format off +/* +* $Id: entry.c 766 2010-09-11 18:59:45Z dhiebert $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for creating tag entries. +*/ + +/* +* INCLUDE FILES +*/ +#include "libc/runtime/runtime.h" +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/str/str.h" /* to define isspace () */ +#include "libc/errno.h" + +#if defined (HAVE_SYS_TYPES_H) +#include "libc/calls/makedev.h" +#include "libc/calls/weirdtypes.h" +#include "libc/thread/thread.h" +#include "libc/calls/typedef/u.h" +#include "libc/calls/weirdtypes.h" +#include "libc/intrin/newbie.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/endian.h" /* to declare off_t on some hosts */ +#endif +#if defined (HAVE_TYPES_H) +// MISSING #include /* to declare off_t on some hosts */ +#endif +#if defined (HAVE_UNISTD_H) +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" /* to declare close (), ftruncate (), truncate () */ +#endif + +/* These header files provide for the functions necessary to do file + * truncation. + */ +#ifdef HAVE_FCNTL_H +#include "libc/calls/calls.h" +#include "libc/calls/struct/flock.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/at.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fd.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/s.h" +#endif +#ifdef HAVE_IO_H +// MISSING #include +#endif + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/ctags.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/main.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/sort.h" +#include "third_party/ctags/strlist.h" + +/* +* MACROS +*/ +#define PSEUDO_TAG_PREFIX "!_" + +#define includeExtensionFlags() (Option.tagFileFormat > 1) + +/* + * Portability defines + */ +#if !defined(HAVE_TRUNCATE) && !defined(HAVE_FTRUNCATE) && !defined(HAVE_CHSIZE) +# define USE_REPLACEMENT_TRUNCATE +#endif + +/* Hack for rediculous practice of Microsoft Visual C++. + */ +#if defined (WIN32) && defined (_MSC_VER) +# define chsize _chsize +# define open _open +# define close _close +# define O_RDWR _O_RDWR +#endif + +/* +* DATA DEFINITIONS +*/ + +tagFile TagFile = { + NULL, /* tag file name */ + NULL, /* tag file directory (absolute) */ + NULL, /* file pointer */ + { 0, 0 }, /* numTags */ + { 0, 0, 0 }, /* max */ + { NULL, NULL, 0 }, /* etags */ + NULL /* vLine */ +}; + +static boolean TagsToStdout = FALSE; + +/* +* FUNCTION PROTOTYPES +*/ +#ifdef NEED_PROTO_TRUNCATE +extern int truncate (const char *path, off_t length); +#endif + +#ifdef NEED_PROTO_FTRUNCATE +extern int ftruncate (int fd, off_t length); +#endif + +/* +* FUNCTION DEFINITIONS +*/ + +extern void freeTagFileResources (void) +{ + if (TagFile.directory != NULL) + eFree (TagFile.directory); + vStringDelete (TagFile.vLine); +} + +extern const char *tagFileName (void) +{ + return TagFile.name; +} + +/* +* Pseudo tag support +*/ + +static void rememberMaxLengths (const size_t nameLength, const size_t lineLength) +{ + if (nameLength > TagFile.max.tag) + TagFile.max.tag = nameLength; + + if (lineLength > TagFile.max.line) + TagFile.max.line = lineLength; +} + +static void writePseudoTag ( + const char *const tagName, + const char *const fileName, + const char *const pattern) +{ + const int length = fprintf ( + TagFile.fp, "%s%s\t%s\t/%s/\n", + PSEUDO_TAG_PREFIX, tagName, fileName, pattern); + ++TagFile.numTags.added; + rememberMaxLengths (strlen (tagName), (size_t) length); +} + +static void addPseudoTags (void) +{ + if (! Option.xref) + { + char format [11]; + const char *formatComment = "unknown format"; + + sprintf (format, "%u", Option.tagFileFormat); + + if (Option.tagFileFormat == 1) + formatComment = "original ctags format"; + else if (Option.tagFileFormat == 2) + formatComment = + "extended format; --format=1 will not append ;\" to lines"; + + writePseudoTag ("TAG_FILE_FORMAT", format, formatComment); + writePseudoTag ("TAG_FILE_SORTED", + Option.sorted == SO_FOLDSORTED ? "2" : + (Option.sorted == SO_SORTED ? "1" : "0"), + "0=unsorted, 1=sorted, 2=foldcase"); + writePseudoTag ("TAG_PROGRAM_AUTHOR", AUTHOR_NAME, AUTHOR_EMAIL); + writePseudoTag ("TAG_PROGRAM_NAME", PROGRAM_NAME, ""); + writePseudoTag ("TAG_PROGRAM_URL", PROGRAM_URL, "official site"); + writePseudoTag ("TAG_PROGRAM_VERSION", PROGRAM_VERSION, ""); + } +} + +static void updateSortedFlag ( + const char *const line, FILE *const fp, fpos_t startOfLine) +{ + const char *const tab = strchr (line, '\t'); + + if (tab != NULL) + { + const long boolOffset = tab - line + 1; /* where it should be */ + + if (line [boolOffset] == '0' || line [boolOffset] == '1') + { + fpos_t nextLine; + + if (fgetpos (fp, &nextLine) == -1 || fsetpos (fp, &startOfLine) == -1) + error (WARNING, "Failed to update 'sorted' pseudo-tag"); + else + { + fpos_t flagLocation; + int c, d; + + do + c = fgetc (fp); + while (c != '\t' && c != '\n'); + fgetpos (fp, &flagLocation); + d = fgetc (fp); + if (c == '\t' && (d == '0' || d == '1') && + d != (int) Option.sorted) + { + fsetpos (fp, &flagLocation); + fputc (Option.sorted == SO_FOLDSORTED ? '2' : + (Option.sorted == SO_SORTED ? '1' : '0'), fp); + } + fsetpos (fp, &nextLine); + } + } + } +} + +/* Look through all line beginning with "!_TAG_FILE", and update those which + * require it. + */ +static long unsigned int updatePseudoTags (FILE *const fp) +{ + enum { maxEntryLength = 20 }; + char entry [maxEntryLength + 1]; + unsigned long linesRead = 0; + fpos_t startOfLine; + size_t entryLength; + const char *line; + + sprintf (entry, "%sTAG_FILE", PSEUDO_TAG_PREFIX); + entryLength = strlen (entry); + Assert (entryLength < maxEntryLength); + + fgetpos (fp, &startOfLine); + line = readLine (TagFile.vLine, fp); + while (line != NULL && line [0] == entry [0]) + { + ++linesRead; + if (strncmp (line, entry, entryLength) == 0) + { + char tab, classType [16]; + + if (sscanf (line + entryLength, "%15s%c", classType, &tab) == 2 && + tab == '\t') + { + if (strcmp (classType, "_SORTED") == 0) + updateSortedFlag (line, fp, startOfLine); + } + fgetpos (fp, &startOfLine); + } + line = readLine (TagFile.vLine, fp); + } + while (line != NULL) /* skip to end of file */ + { + ++linesRead; + line = readLine (TagFile.vLine, fp); + } + return linesRead; +} + +/* + * Tag file management + */ + +static boolean isValidTagAddress (const char *const excmd) +{ + boolean isValid = FALSE; + + if (strchr ("/?", excmd [0]) != NULL) + isValid = TRUE; + else + { + char *address = xMalloc (strlen (excmd) + 1, char); + if (sscanf (excmd, "%[^;\n]", address) == 1 && + strspn (address,"0123456789") == strlen (address)) + isValid = TRUE; + eFree (address); + } + return isValid; +} + +static boolean isCtagsLine (const char *const line) +{ + enum fieldList { TAG, TAB1, SRC_FILE, TAB2, EXCMD, NUM_FIELDS }; + boolean ok = FALSE; /* we assume not unless confirmed */ + const size_t fieldLength = strlen (line) + 1; + char *const fields = xMalloc (NUM_FIELDS * fieldLength, char); + + if (fields == NULL) + error (FATAL, "Cannot analyze tag file"); + else + { +#define field(x) (fields + ((size_t) (x) * fieldLength)) + + const int numFields = sscanf ( + line, "%[^\t]%[\t]%[^\t]%[\t]%[^\r\n]", + field (TAG), field (TAB1), field (SRC_FILE), + field (TAB2), field (EXCMD)); + + /* There must be exactly five fields: two tab fields containing + * exactly one tab each, the tag must not begin with "#", and the + * file name should not end with ";", and the excmd must be + * accceptable. + * + * These conditions will reject tag-looking lines like: + * int a; + * #define LABEL + */ + if (numFields == NUM_FIELDS && + strlen (field (TAB1)) == 1 && + strlen (field (TAB2)) == 1 && + field (TAG) [0] != '#' && + field (SRC_FILE) [strlen (field (SRC_FILE)) - 1] != ';' && + isValidTagAddress (field (EXCMD))) + ok = TRUE; + + eFree (fields); + } + return ok; +} + +static boolean isEtagsLine (const char *const line) +{ + boolean result = FALSE; + if (line [0] == '\f') + result = (boolean) (line [1] == '\n' || line [1] == '\r'); + return result; +} + +static boolean isTagFile (const char *const filename) +{ + boolean ok = FALSE; /* we assume not unless confirmed */ + FILE *const fp = fopen (filename, "rb"); + + if (fp == NULL && errno == ENOENT) + ok = TRUE; + else if (fp != NULL) + { + const char *line = readLine (TagFile.vLine, fp); + + if (line == NULL) + ok = TRUE; + else + ok = (boolean) (isCtagsLine (line) || isEtagsLine (line)); + fclose (fp); + } + return ok; +} + +extern void copyBytes (FILE* const fromFp, FILE* const toFp, const long size) +{ + enum { BufferSize = 1000 }; + long toRead, numRead; + char* buffer = xMalloc (BufferSize, char); + long remaining = size; + do + { + toRead = (0 < remaining && remaining < BufferSize) ? + remaining : (long) BufferSize; + numRead = fread (buffer, (size_t) 1, (size_t) toRead, fromFp); + if (fwrite (buffer, (size_t)1, (size_t)numRead, toFp) < (size_t)numRead) + error (FATAL | PERROR, "cannot complete write"); + if (remaining > 0) + remaining -= numRead; + } while (numRead == toRead && remaining != 0); + eFree (buffer); +} + +extern void copyFile (const char *const from, const char *const to, const long size) +{ + FILE* const fromFp = fopen (from, "rb"); + if (fromFp == NULL) + error (FATAL | PERROR, "cannot open file to copy"); + else + { + FILE* const toFp = fopen (to, "wb"); + if (toFp == NULL) + error (FATAL | PERROR, "cannot open copy destination"); + else + { + copyBytes (fromFp, toFp, size); + fclose (toFp); + } + fclose (fromFp); + } +} + +extern void openTagFile (void) +{ + setDefaultTagFileName (); + TagsToStdout = isDestinationStdout (); + + if (TagFile.vLine == NULL) + TagFile.vLine = vStringNew (); + + /* Open the tags file. + */ + if (TagsToStdout) + TagFile.fp = tempFile ("w", &TagFile.name); + else + { + boolean fileExists; + + setDefaultTagFileName (); + TagFile.name = eStrdup (Option.tagFileName); + fileExists = doesFileExist (TagFile.name); + if (fileExists && ! isTagFile (TagFile.name)) + error (FATAL, + "\"%s\" doesn't look like a tag file; I refuse to overwrite it.", + TagFile.name); + + if (Option.etags) + { + if (Option.append && fileExists) + TagFile.fp = fopen (TagFile.name, "a+b"); + else + TagFile.fp = fopen (TagFile.name, "w+b"); + } + else + { + if (Option.append && fileExists) + { + TagFile.fp = fopen (TagFile.name, "r+"); + if (TagFile.fp != NULL) + { + TagFile.numTags.prev = updatePseudoTags (TagFile.fp); + fclose (TagFile.fp); + TagFile.fp = fopen (TagFile.name, "a+"); + } + } + else + { + TagFile.fp = fopen (TagFile.name, "w"); + if (TagFile.fp != NULL) + addPseudoTags (); + } + } + if (TagFile.fp == NULL) + { + error (FATAL | PERROR, "cannot open tag file"); + exit (1); + } + } + if (TagsToStdout) + TagFile.directory = eStrdup (CurrentDirectory); + else + TagFile.directory = absoluteDirname (TagFile.name); +} + +#ifdef USE_REPLACEMENT_TRUNCATE + +/* Replacement for missing library function. + */ +static int replacementTruncate (const char *const name, const long size) +{ + char *tempName = NULL; + FILE *fp = tempFile ("w", &tempName); + fclose (fp); + copyFile (name, tempName, size); + copyFile (tempName, name, WHOLE_FILE); + remove (tempName); + eFree (tempName); + + return 0; +} + +#endif + +static void sortTagFile (void) +{ + if (TagFile.numTags.added > 0L) + { + if (Option.sorted != SO_UNSORTED) + { + verbose ("sorting tag file\n"); +#ifdef EXTERNAL_SORT + externalSortTags (TagsToStdout); +#else + internalSortTags (TagsToStdout); +#endif + } + else if (TagsToStdout) + catFile (tagFileName ()); + } + if (TagsToStdout) + remove (tagFileName ()); /* remove temporary file */ +} + +static void resizeTagFile (const long newSize) +{ + int result; + +#ifdef USE_REPLACEMENT_TRUNCATE + result = replacementTruncate (TagFile.name, newSize); +#else +# ifdef HAVE_TRUNCATE + result = truncate (TagFile.name, (off_t) newSize); +# else + const int fd = open (TagFile.name, O_RDWR); + + if (fd == -1) + result = -1; + else + { +# ifdef HAVE_FTRUNCATE + result = ftruncate (fd, (off_t) newSize); +# else +# ifdef HAVE_CHSIZE + result = chsize (fd, newSize); +# endif +# endif + close (fd); + } +# endif +#endif + if (result == -1) + fprintf (errout, "Cannot shorten tag file: errno = %d\n", errno); +} + +static void writeEtagsIncludes (FILE *const fp) +{ + if (Option.etagsInclude) + { + unsigned int i; + for (i = 0 ; i < stringListCount (Option.etagsInclude) ; ++i) + { + vString *item = stringListItem (Option.etagsInclude, i); + fprintf (fp, "\f\n%s,include\n", vStringValue (item)); + } + } +} + +extern void closeTagFile (const boolean resize) +{ + long desiredSize, size; + + if (Option.etags) + writeEtagsIncludes (TagFile.fp); + desiredSize = ftell (TagFile.fp); + fseek (TagFile.fp, 0L, SEEK_END); + size = ftell (TagFile.fp); + fclose (TagFile.fp); + if (resize && desiredSize < size) + { + DebugStatement ( + debugPrintf (DEBUG_STATUS, "shrinking %s from %ld to %ld bytes\n", + TagFile.name, size, desiredSize); ) + resizeTagFile (desiredSize); + } + sortTagFile (); + eFree (TagFile.name); + TagFile.name = NULL; +} + +extern void beginEtagsFile (void) +{ + TagFile.etags.fp = tempFile ("w+b", &TagFile.etags.name); + TagFile.etags.byteCount = 0; +} + +extern void endEtagsFile (const char *const name) +{ + const char *line; + + fprintf (TagFile.fp, "\f\n%s,%ld\n", name, (long) TagFile.etags.byteCount); + if (TagFile.etags.fp != NULL) + { + rewind (TagFile.etags.fp); + while ((line = readLine (TagFile.vLine, TagFile.etags.fp)) != NULL) + fputs (line, TagFile.fp); + fclose (TagFile.etags.fp); + remove (TagFile.etags.name); + eFree (TagFile.etags.name); + TagFile.etags.fp = NULL; + TagFile.etags.name = NULL; + } +} + +/* + * Tag entry management + */ + +/* This function copies the current line out to a specified file. It has no + * effect on the fileGetc () function. During copying, any '\' characters + * are doubled and a leading '^' or trailing '$' is also quoted. End of line + * characters (line feed or carriage return) are dropped. + */ +static size_t writeSourceLine (FILE *const fp, const char *const line) +{ + size_t length = 0; + const char *p; + + /* Write everything up to, but not including, a line end character. + */ + for (p = line ; *p != '\0' ; ++p) + { + const int next = *(p + 1); + const int c = *p; + + if (c == CRETURN || c == NEWLINE) + break; + + /* If character is '\', or a terminal '$', then quote it. + */ + if (c == BACKSLASH || c == (Option.backward ? '?' : '/') || + (c == '$' && (next == NEWLINE || next == CRETURN))) + { + putc (BACKSLASH, fp); + ++length; + } + putc (c, fp); + ++length; + } + return length; +} + +/* Writes "line", stripping leading and duplicate white space. + */ +static size_t writeCompactSourceLine (FILE *const fp, const char *const line) +{ + boolean lineStarted = FALSE; + size_t length = 0; + const char *p; + int c; + + /* Write everything up to, but not including, the newline. + */ + for (p = line, c = *p ; c != NEWLINE && c != '\0' ; c = *++p) + { + if (lineStarted || ! isspace (c)) /* ignore leading spaces */ + { + lineStarted = TRUE; + if (isspace (c)) + { + int next; + + /* Consume repeating white space. + */ + while (next = *(p+1) , isspace (next) && next != NEWLINE) + ++p; + c = ' '; /* force space character for any white space */ + } + if (c != CRETURN || *(p + 1) != NEWLINE) + { + putc (c, fp); + ++length; + } + } + } + return length; +} + +static int writeXrefEntry (const tagEntryInfo *const tag) +{ + const char *const line = + readSourceLine (TagFile.vLine, tag->filePosition, NULL); + int length; + + if (Option.tagFileFormat == 1) + length = fprintf (TagFile.fp, "%-16s %4lu %-16s ", tag->name, + tag->lineNumber, tag->sourceFileName); + else + length = fprintf (TagFile.fp, "%-16s %-10s %4lu %-16s ", tag->name, + tag->kindName, tag->lineNumber, tag->sourceFileName); + + length += writeCompactSourceLine (TagFile.fp, line); + putc (NEWLINE, TagFile.fp); + ++length; + + return length; +} + +/* Truncates the text line containing the tag at the character following the + * tag, providing a character which designates the end of the tag. + */ +static void truncateTagLine ( + char *const line, const char *const token, const boolean discardNewline) +{ + char *p = strstr (line, token); + + if (p != NULL) + { + p += strlen (token); + if (*p != '\0' && ! (*p == '\n' && discardNewline)) + ++p; /* skip past character terminating character */ + *p = '\0'; + } +} + +static int writeEtagsEntry (const tagEntryInfo *const tag) +{ + int length; + + if (tag->isFileEntry) + length = fprintf (TagFile.etags.fp, "\177%s\001%lu,0\n", + tag->name, tag->lineNumber); + else + { + long seekValue; + char *const line = + readSourceLine (TagFile.vLine, tag->filePosition, &seekValue); + + if (tag->truncateLine) + truncateTagLine (line, tag->name, TRUE); + else + line [strlen (line) - 1] = '\0'; + + length = fprintf (TagFile.etags.fp, "%s\177%s\001%lu,%ld\n", line, + tag->name, tag->lineNumber, seekValue); + } + TagFile.etags.byteCount += length; + + return length; +} + +static int addExtensionFields (const tagEntryInfo *const tag) +{ + const char* const kindKey = Option.extensionFields.kindKey ? "kind:" : ""; + boolean first = TRUE; + const char* separator = ";\""; + const char* const empty = ""; + int length = 0; +/* "sep" returns a value only the first time it is evaluated */ +#define sep (first ? (first = FALSE, separator) : empty) + + if (tag->kindName != NULL && (Option.extensionFields.kindLong || + (Option.extensionFields.kind && tag->kind == '\0'))) + length += fprintf (TagFile.fp,"%s\t%s%s", sep, kindKey, tag->kindName); + else if (tag->kind != '\0' && (Option.extensionFields.kind || + (Option.extensionFields.kindLong && tag->kindName == NULL))) + length += fprintf (TagFile.fp, "%s\t%s%c", sep, kindKey, tag->kind); + + if (Option.extensionFields.lineNumber) + length += fprintf (TagFile.fp, "%s\tline:%ld", sep, tag->lineNumber); + + if (Option.extensionFields.language && tag->language != NULL) + length += fprintf (TagFile.fp, "%s\tlanguage:%s", sep, tag->language); + + if (Option.extensionFields.scope && + tag->extensionFields.scope [0] != NULL && + tag->extensionFields.scope [1] != NULL) + length += fprintf (TagFile.fp, "%s\t%s:%s", sep, + tag->extensionFields.scope [0], + tag->extensionFields.scope [1]); + + if (Option.extensionFields.typeRef && + tag->extensionFields.typeRef [0] != NULL && + tag->extensionFields.typeRef [1] != NULL) + length += fprintf (TagFile.fp, "%s\ttyperef:%s:%s", sep, + tag->extensionFields.typeRef [0], + tag->extensionFields.typeRef [1]); + + if (Option.extensionFields.fileScope && tag->isFileScope) + length += fprintf (TagFile.fp, "%s\tfile:", sep); + + if (Option.extensionFields.inheritance && + tag->extensionFields.inheritance != NULL) + length += fprintf (TagFile.fp, "%s\tinherits:%s", sep, + tag->extensionFields.inheritance); + + if (Option.extensionFields.access && tag->extensionFields.access != NULL) + length += fprintf (TagFile.fp, "%s\taccess:%s", sep, + tag->extensionFields.access); + + if (Option.extensionFields.implementation && + tag->extensionFields.implementation != NULL) + length += fprintf (TagFile.fp, "%s\timplementation:%s", sep, + tag->extensionFields.implementation); + + if (Option.extensionFields.signature && + tag->extensionFields.signature != NULL) + length += fprintf (TagFile.fp, "%s\tsignature:%s", sep, + tag->extensionFields.signature); + + return length; +#undef sep +} + +static int writePatternEntry (const tagEntryInfo *const tag) +{ + char *const line = readSourceLine (TagFile.vLine, tag->filePosition, NULL); + const int searchChar = Option.backward ? '?' : '/'; + boolean newlineTerminated; + int length = 0; + + if (line == NULL) + error (FATAL, "bad tag in %s", vStringValue (File.name)); + if (tag->truncateLine) + truncateTagLine (line, tag->name, FALSE); + newlineTerminated = (boolean) (line [strlen (line) - 1] == '\n'); + + length += fprintf (TagFile.fp, "%c^", searchChar); + length += writeSourceLine (TagFile.fp, line); + length += fprintf (TagFile.fp, "%s%c", newlineTerminated ? "$":"", searchChar); + + return length; +} + +static int writeLineNumberEntry (const tagEntryInfo *const tag) +{ + return fprintf (TagFile.fp, "%lu", tag->lineNumber); +} + +static int writeCtagsEntry (const tagEntryInfo *const tag) +{ + int length = fprintf (TagFile.fp, "%s\t%s\t", + tag->name, tag->sourceFileName); + + if (tag->lineNumberEntry) + length += writeLineNumberEntry (tag); + else + length += writePatternEntry (tag); + + if (includeExtensionFlags ()) + length += addExtensionFields (tag); + + length += fprintf (TagFile.fp, "\n"); + + return length; +} + +extern void makeTagEntry (const tagEntryInfo *const tag) +{ + Assert (tag->name != NULL); + if (tag->name [0] == '\0') + error (WARNING, "ignoring null tag in %s", vStringValue (File.name)); + else + { + int length = 0; + + DebugStatement ( debugEntry (tag); ) + if (Option.xref) + { + if (! tag->isFileEntry) + length = writeXrefEntry (tag); + } + else if (Option.etags) + length = writeEtagsEntry (tag); + else + length = writeCtagsEntry (tag); + + ++TagFile.numTags.added; + rememberMaxLengths (strlen (tag->name), (size_t) length); + DebugStatement ( fflush (TagFile.fp); ) + } +} + +extern void initTagEntry (tagEntryInfo *const e, const char *const name) +{ + Assert (File.source.name != NULL); + memset (e, 0, sizeof (tagEntryInfo)); + e->lineNumberEntry = (boolean) (Option.locate == EX_LINENUM); + e->lineNumber = getSourceLineNumber (); + e->language = getSourceLanguageName (); + e->filePosition = getInputFilePosition (); + e->sourceFileName = getSourceFileTagPath (); + e->name = name; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/entry.h b/third_party/ctags/entry.h new file mode 100644 index 000000000..ca43387ae --- /dev/null +++ b/third_party/ctags/entry.h @@ -0,0 +1,111 @@ +// clang-format off +/* +* $Id: entry.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to entry.c +*/ +#ifndef _ENTRY_H +#define _ENTRY_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +#include "third_party/ctags/vstring.h" + +/* +* MACROS +*/ +#define WHOLE_FILE -1L + +/* +* DATA DECLARATIONS +*/ + +/* Maintains the state of the tag file. + */ +typedef struct eTagFile { + char *name; + char *directory; + FILE *fp; + struct sNumTags { unsigned long added, prev; } numTags; + struct sMax { size_t line, tag, file; } max; + struct sEtags { + char *name; + FILE *fp; + size_t byteCount; + } etags; + vString *vLine; +} tagFile; + +typedef struct sTagFields { + unsigned int count; /* number of additional extension flags */ + const char *const *label; /* list of labels for extension flags */ + const char *const *value; /* list of values for extension flags */ +} tagFields; + +/* Information about the current tag candidate. + */ +typedef struct sTagEntryInfo { + boolean lineNumberEntry; /* pattern or line number entry */ + unsigned long lineNumber; /* line number of tag */ + fpos_t filePosition; /* file position of line containing tag */ + const char* language; /* language of source file */ + boolean isFileScope; /* is tag visibile only within source file? */ + boolean isFileEntry; /* is this just an entry for a file name? */ + boolean truncateLine; /* truncate tag line at end of tag name? */ + const char *sourceFileName; /* name of source file */ + const char *name; /* name of the tag */ + const char *kindName; /* kind of tag */ + char kind; /* single character representation of kind */ + struct { + const char* access; + const char* fileScope; + const char* implementation; + const char* inheritance; + const char* scope [2]; /* value and key */ + const char* signature; + + /* type (union/struct/etc.) and name for a variable or typedef. */ + const char* typeRef [2]; /* e.g., "struct" and struct name */ + + } extensionFields; /* list of extension fields*/ +} tagEntryInfo; + +/* +* GLOBAL VARIABLES +*/ +extern tagFile TagFile; + +/* +* FUNCTION PROTOTYPES +*/ +extern void freeTagFileResources (void); +extern const char *tagFileName (void); +extern void copyBytes (FILE* const fromFp, FILE* const toFp, const long size); +extern void copyFile (const char *const from, const char *const to, const long size); +extern void openTagFile (void); +extern void closeTagFile (const boolean resize); +extern void beginEtagsFile (void); +extern void endEtagsFile (const char *const name); +extern void makeTagEntry (const tagEntryInfo *const tag); +extern void initTagEntry (tagEntryInfo *const e, const char *const name); + +#endif /* _ENTRY_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/erlang.c b/third_party/ctags/erlang.c new file mode 100644 index 000000000..768aabd85 --- /dev/null +++ b/third_party/ctags/erlang.c @@ -0,0 +1,191 @@ +// clang-format off +/* +* $Id: erlang.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2003, Brent Fulgham +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Erlang language +* files. Some of the parsing constructs are based on the Emacs 'etags' +* program by Francesco Potori +*/ +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/entry.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_MACRO, K_FUNCTION, K_MODULE, K_RECORD +} erlangKind; + +static kindOption ErlangKinds[] = { + {TRUE, 'd', "macro", "macro definitions"}, + {TRUE, 'f', "function", "functions"}, + {TRUE, 'm', "module", "modules"}, + {TRUE, 'r', "record", "record definitions"}, +}; + +/* +* FUNCTION DEFINITIONS +*/ +/* tagEntryInfo and vString should be preinitialized/preallocated but not + * necessary. If successful you will find class name in vString + */ + +static boolean isIdentifierFirstCharacter (int c) +{ + return (boolean) (isalpha (c)); +} + +static boolean isIdentifierCharacter (int c) +{ + return (boolean) (isalnum (c) || c == '_' || c == ':'); +} + +static const unsigned char *skipSpace (const unsigned char *cp) +{ + while (isspace ((int) *cp)) + ++cp; + return cp; +} + +static const unsigned char *parseIdentifier ( + const unsigned char *cp, vString *const identifier) +{ + vStringClear (identifier); + while (isIdentifierCharacter ((int) *cp)) + { + vStringPut (identifier, (int) *cp); + ++cp; + } + vStringTerminate (identifier); + return cp; +} + +static void makeMemberTag ( + vString *const identifier, erlangKind kind, vString *const module) +{ + if (ErlangKinds [kind].enabled && vStringLength (identifier) > 0) + { + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (identifier)); + tag.kindName = ErlangKinds[kind].name; + tag.kind = ErlangKinds[kind].letter; + + if (module != NULL && vStringLength (module) > 0) + { + tag.extensionFields.scope [0] = "module"; + tag.extensionFields.scope [1] = vStringValue (module); + } + makeTagEntry (&tag); + } +} + +static void parseModuleTag (const unsigned char *cp, vString *const module) +{ + vString *const identifier = vStringNew (); + parseIdentifier (cp, identifier); + makeSimpleTag (identifier, ErlangKinds, K_MODULE); + + /* All further entries go in the new module */ + vStringCopy (module, identifier); + vStringDelete (identifier); +} + +static void parseSimpleTag (const unsigned char *cp, erlangKind kind) +{ + vString *const identifier = vStringNew (); + parseIdentifier (cp, identifier); + makeSimpleTag (identifier, ErlangKinds, kind); + vStringDelete (identifier); +} + +static void parseFunctionTag (const unsigned char *cp, vString *const module) +{ + vString *const identifier = vStringNew (); + parseIdentifier (cp, identifier); + makeMemberTag (identifier, K_FUNCTION, module); + vStringDelete (identifier); +} + +/* + * Directives are of the form: + * -module(foo) + * -define(foo, bar) + * -record(graph, {vtab = notable, cyclic = true}). + */ +static void parseDirective (const unsigned char *cp, vString *const module) +{ + /* + * A directive will be either a record definition or a directive. + * Record definitions are handled separately + */ + vString *const directive = vStringNew (); + const char *const drtv = vStringValue (directive); + cp = parseIdentifier (cp, directive); + cp = skipSpace (cp); + if (*cp == '(') + ++cp; + + if (strcmp (drtv, "record") == 0) + parseSimpleTag (cp, K_RECORD); + else if (strcmp (drtv, "define") == 0) + parseSimpleTag (cp, K_MACRO); + else if (strcmp (drtv, "module") == 0) + parseModuleTag (cp, module); + /* Otherwise, it was an import, export, etc. */ + + vStringDelete (directive); +} + +static void findErlangTags (void) +{ + vString *const module = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + + if (*cp == '%') /* skip initial comment */ + continue; + if (*cp == '"') /* strings sometimes start in column one */ + continue; + + if ( *cp == '-') + { + ++cp; /* Move off of the '-' */ + parseDirective(cp, module); + } + else if (isIdentifierFirstCharacter ((int) *cp)) + parseFunctionTag (cp, module); + } + vStringDelete (module); +} + +extern parserDefinition *ErlangParser (void) +{ + static const char *const extensions[] = { "erl", "ERL", "hrl", "HRL", NULL }; + parserDefinition *def = parserNew ("Erlang"); + def->kinds = ErlangKinds; + def->kindCount = KIND_COUNT (ErlangKinds); + def->extensions = extensions; + def->parser = findErlangTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/flex.c b/third_party/ctags/flex.c new file mode 100644 index 000000000..625962a84 --- /dev/null +++ b/third_party/ctags/flex.c @@ -0,0 +1,2431 @@ +// clang-format off +/* + * $Id: flex.c 666 2008-05-15 17:47:31Z dfishburn $ + * + * Copyright (c) 2008, David Fishburn + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for Adobe languages. + * There are a number of different ones, but this will begin with: + * Flex + * MXML files (*.mMacromedia XML) + * ActionScript files (*.as) + * + * Flex 3 language reference + * http://livedocs.adobe.com/flex/3/langref/index.html + */ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ +#include "libc/str/str.h" /* to define isalpha () */ +#include "libc/runtime/runtime.h" +#ifdef DEBUG +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#endif + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Tracks class and function names already created + */ +static stringList *ClassNames; +static stringList *FunctionNames; + +/* Used to specify type of keyword. +*/ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_function, + KEYWORD_capital_function, + KEYWORD_object, + KEYWORD_capital_object, + KEYWORD_prototype, + KEYWORD_var, + KEYWORD_new, + KEYWORD_this, + KEYWORD_for, + KEYWORD_while, + KEYWORD_do, + KEYWORD_if, + KEYWORD_else, + KEYWORD_switch, + KEYWORD_try, + KEYWORD_catch, + KEYWORD_finally, + KEYWORD_public, + KEYWORD_private, + KEYWORD_static, + KEYWORD_class, + KEYWORD_id, + KEYWORD_name, + KEYWORD_script, + KEYWORD_cdata, + KEYWORD_mx, + KEYWORD_fx, + KEYWORD_override +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_CHARACTER, + TOKEN_CLOSE_PAREN, + TOKEN_SEMICOLON, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_KEYWORD, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_PERIOD, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_EQUAL_SIGN, + TOKEN_EXCLAMATION, + TOKEN_FORWARD_SLASH, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_OPEN_MXML, + TOKEN_CLOSE_MXML, + TOKEN_CLOSE_SGML, + TOKEN_LESS_THAN, + TOKEN_GREATER_THAN, + TOKEN_QUESTION_MARK, + TOKEN_OPEN_NAMESPACE +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; + int nestLevel; + boolean ignoreTag; + boolean isClass; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_js; + +static jmp_buf Exception; + +typedef enum { + FLEXTAG_FUNCTION, + FLEXTAG_CLASS, + FLEXTAG_METHOD, + FLEXTAG_PROPERTY, + FLEXTAG_VARIABLE, + FLEXTAG_MXTAG, + FLEXTAG_COUNT +} flexKind; + +static kindOption FlexKinds [] = { + { TRUE, 'f', "function", "functions" }, + { TRUE, 'c', "class", "classes" }, + { TRUE, 'm', "method", "methods" }, + { TRUE, 'p', "property", "properties" }, + { TRUE, 'v', "variable", "global variables" }, + { TRUE, 'x', "mxtag", "mxtags" } +}; + +static const keywordDesc FlexKeywordTable [] = { + /* keyword keyword ID */ + { "function", KEYWORD_function }, + { "Function", KEYWORD_capital_function }, + { "object", KEYWORD_object }, + { "Object", KEYWORD_capital_object }, + { "prototype", KEYWORD_prototype }, + { "var", KEYWORD_var }, + { "new", KEYWORD_new }, + { "this", KEYWORD_this }, + { "for", KEYWORD_for }, + { "while", KEYWORD_while }, + { "do", KEYWORD_do }, + { "if", KEYWORD_if }, + { "else", KEYWORD_else }, + { "switch", KEYWORD_switch }, + { "try", KEYWORD_try }, + { "catch", KEYWORD_catch }, + { "finally", KEYWORD_finally }, + { "public", KEYWORD_public }, + { "private", KEYWORD_private }, + { "static", KEYWORD_static }, + { "class", KEYWORD_class }, + { "id", KEYWORD_id }, + { "name", KEYWORD_name }, + { "script", KEYWORD_script }, + { "cdata", KEYWORD_cdata }, + { "mx", KEYWORD_mx }, + { "fx", KEYWORD_fx }, + { "override", KEYWORD_override } +}; + +/* + * FUNCTION DEFINITIONS + */ + +/* Recursive functions */ +static void parseFunction (tokenInfo *const token); +static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent); +static boolean parseLine (tokenInfo *const token); +static boolean parseActionScript (tokenInfo *const token); +static boolean parseMXML (tokenInfo *const token); + +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '@' || c == '_' || c == '#'); +} + +static void buildFlexKeywordHash (void) +{ + const size_t count = sizeof (FlexKeywordTable) / + sizeof (FlexKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &FlexKeywordTable [i]; + addKeyword (p->name, Lang_js, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->nestLevel = 0; + token->isClass = FALSE; + token->ignoreTag = FALSE; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +/* + * Tag generation functions + */ + +static void makeConstTag (tokenInfo *const token, const flexKind kind) +{ + if (FlexKinds [kind].enabled && ! token->ignoreTag ) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = FlexKinds [kind].name; + e.kind = FlexKinds [kind].letter; + + makeTagEntry (&e); + } +} + +static void makeFlexTag (tokenInfo *const token, flexKind kind) +{ + vString * fulltag; + + if (FlexKinds [kind].enabled && ! token->ignoreTag ) + { + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n makeFlexTag start: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + if (kind == FLEXTAG_FUNCTION && token->isClass ) + { + kind = FLEXTAG_METHOD; + } + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if ( vStringLength(token->scope) > 0 ) + { + fulltag = vStringNew (); + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + vStringTerminate(fulltag); + vStringCopy(token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +static void makeClassTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + stringListAdd (ClassNames, vStringNewCopy (fulltag)); + makeFlexTag (token, FLEXTAG_CLASS); + } + vStringDelete (fulltag); + } +} + +static void makeMXTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + makeFlexTag (token, FLEXTAG_MXTAG); + vStringDelete (fulltag); + } +} + +static void makeFunctionTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) ) + { + stringListAdd (FunctionNames, vStringNewCopy (fulltag)); + makeFlexTag (token, FLEXTAG_FUNCTION); + } + vStringDelete (fulltag); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (! end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\') + { + c = fileGetc(); /* This maybe a ' or ". */ + vStringPut(string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* Read a C identifier beginning with "firstChar" and places it into + * "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ';': token->type = TOKEN_SEMICOLON; break; + case ',': token->type = TOKEN_COMMA; break; + case '.': token->type = TOKEN_PERIOD; break; + case ':': token->type = TOKEN_COLON; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case '=': token->type = TOKEN_EQUAL_SIGN; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + case '?': token->type = TOKEN_QUESTION_MARK; break; + + case '\'': + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '\\': + c = fileGetc (); + if (c != '\\' && c != '"' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_CHARACTER; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '/': + { + int d = fileGetc (); + if ( (d != '*') && /* is this the start of a comment? */ + (d != '/') && /* is a one line comment? */ + (d != '>') ) /* is this a close XML tag? */ + { + fileUngetc (d); + token->type = TOKEN_FORWARD_SLASH; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (d == '*') + { + do + { + fileSkipToCharacter ('*'); + c = fileGetc (); + if (c == '/') + break; + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + goto getNextChar; + } + else if (d == '/') /* is this the start of a comment? */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + else if (d == '>') /* is this the start of a comment? */ + { + token->type = TOKEN_CLOSE_SGML; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + } + break; + } + + case '<': + { + /* + * An XML comment looks like this + * + */ + int d = fileGetc (); + + if ( (d != '!' ) && /* is this the start of a comment? */ + (d != '/' ) && /* is this the start of a closing mx tag */ + (d != 'm' ) && /* is this the start of a mx tag */ + (d != 'f' ) && /* is this the start of a fx tag */ + (d != 's' ) ) /* is this the start of a spark tag */ + { + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + else + { + if (d == '!') + { + int e = fileGetc (); + if ( e != '-' ) /* is this the start of a comment? */ + { + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (e == '-') + { + int f = fileGetc (); + if ( f != '-' ) /* is this the start of a comment? */ + { + fileUngetc (f); + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (f == '-') + { + do + { + fileSkipToCharacter ('-'); + c = fileGetc (); + if (c == '-') + { + d = fileGetc (); + if (d == '>') + break; + else + { + fileUngetc (d); + fileUngetc (c); + } + break; + } + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + goto getNextChar; + } + } + } + } + } + else if (d == 'm' || d == 'f' || d == 's' ) + { + int e = fileGetc (); + if ( (d == 'm' || d == 'f') && e != 'x' ) /* continuing an mx or fx tag */ + { + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + else + { + if ( (d == 'm' || d == 'f') && e == 'x' ) + { + int f = fileGetc (); + if ( f != ':' ) /* start of the tag */ + { + fileUngetc (f); + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + else + { + token->type = TOKEN_OPEN_MXML; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + } + if ( d == 's' && e == ':') /* continuing a spark tag */ + { + token->type = TOKEN_OPEN_MXML; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + else + { + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + } + } + else if (d == '/') + { + int e = fileGetc (); + if ( !(e == 'm' || e == 'f' || e == 's' )) + { + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + else + { + int f = fileGetc (); + if ( (e == 'm' || e == 'f') && f != 'x' ) /* continuing an mx or fx tag */ + { + fileUngetc (f); + fileUngetc (e); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + else + { + if (f == 'x') + { + int g = fileGetc (); + if ( g != ':' ) /* is this the start of a comment? */ + { + fileUngetc (g); + fileUngetc (f); + fileUngetc (e); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + else + { + token->type = TOKEN_CLOSE_MXML; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + } + if ( e == 's' && f == ':') /* continuing a spark tag */ + { + token->type = TOKEN_CLOSE_MXML; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + else + { + fileUngetc (f); + fileUngetc (e); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + } + } + } + } + break; + } + + case '>': + token->type = TOKEN_GREATER_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '!': + token->type = TOKEN_EXCLAMATION; + /*token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition ();*/ + break; + + default: + if (! isIdentChar (c)) + token->type = TOKEN_UNDEFINED; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_js); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->nestLevel = src->nestLevel; + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + dest->isClass = src->isClass; + vStringCopy(dest->string, src->string); + vStringCopy(dest->scope, src->scope); +} + +/* + * Token parsing functions + */ + +static void skipArgumentList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Other databases can have arguments with fully declared + * datatypes: + * ( name varchar(30), text binary(10) ) + * So we must check for nested open and closing parantheses + */ + + if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_PAREN)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void skipArrayList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Handle square brackets + * var name[1] + * So we must check for nested open and closing square brackets + */ + + if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_SQUARE)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void addContext (tokenInfo* const parent, const tokenInfo* const child) +{ + if (vStringLength (parent->string) > 0) + { + vStringCatS (parent->string, "."); + } + vStringCatS (parent->string, vStringValue(child->string)); + vStringTerminate(parent->string); +} + +static void addToScope (tokenInfo* const token, vString* const extra) +{ + if (vStringLength (token->scope) > 0) + { + vStringCatS (token->scope, "."); + } + vStringCatS (token->scope, vStringValue(extra)); + vStringTerminate(token->scope); +} + +/* + * Scanning functions + */ + +static void findCmdTerm (tokenInfo *const token) +{ + /* + * Read until we find either a semicolon or closing brace. + * Any nested braces will be handled within. + */ + while (! ( isType (token, TOKEN_SEMICOLON) || + isType (token, TOKEN_CLOSE_CURLY) ) ) + { + /* Handle nested blocks */ + if ( isType (token, TOKEN_OPEN_CURLY)) + { + parseBlock (token, token); + } + else if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } + else + { + readToken (token); + } + } +} + +static void parseSwitch (tokenInfo *const token) +{ + /* + * switch (expression){ + * case value1: + * statement; + * break; + * case value2: + * statement; + * break; + * default : statement; + * } + */ + + readToken (token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + do + { + readToken (token); + } while (! (isType (token, TOKEN_CLOSE_SGML) || + isType (token, TOKEN_CLOSE_MXML) || + isType (token, TOKEN_CLOSE_CURLY) || + isType (token, TOKEN_GREATER_THAN)) ); + } + +} + +static void parseLoop (tokenInfo *const token) +{ + /* + * Handles these statements + * for (x=0; x<3; x++) + * document.write("This text is repeated three times
"); + * + * for (x=0; x<3; x++) + * { + * document.write("This text is repeated three times
"); + * } + * + * while (number<5){ + * document.write(number+"
"); + * number++; + * } + * + * do{ + * document.write(number+"
"); + * number++; + * } + * while (number<5); + */ + + if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + parseLine(token); + } + } + else if (isKeyword (token, KEYWORD_do)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + parseLine(token); + } + + readToken(token); + + if (isKeyword (token, KEYWORD_while)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + } + } +} + +static boolean parseIf (tokenInfo *const token) +{ + boolean read_next_token = TRUE; + /* + * If statements have two forms + * if ( ... ) + * one line; + * + * if ( ... ) + * statement; + * else + * statement + * + * if ( ... ) { + * multiple; + * statements; + * } + * + * + * if ( ... ) { + * return elem + * } + * + * This example if correctly written, but the + * else contains only 1 statement without a terminator + * since the function finishes with the closing brace. + * + * function a(flag){ + * if(flag) + * test(1); + * else + * test(2) + * } + * + * TODO: Deal with statements that can optional end + * without a semi-colon. Currently this messes up + * the parsing of blocks. + * Need to somehow detect this has happened, and either + * backup a token, or skip reading the next token if + * that is possible from all code locations. + * + */ + + readToken (token); + + if (isKeyword (token, KEYWORD_if)) + { + /* + * Check for an "else if" and consume the "if" + */ + readToken (token); + } + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + findCmdTerm (token); + + /* + * The IF could be followed by an ELSE statement. + * This too could have two formats, a curly braced + * multiline section, or another single line. + */ + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + readToken (token); + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + if (isKeyword (token, KEYWORD_else)) + read_next_token = parseIf (token); + } + } + } + return read_next_token; +} + +static void parseFunction (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * private static function ioErrorHandler( event:IOErrorEvent ):void { + */ + + if ( isKeyword(token, KEYWORD_function) ) + { + readToken (token); + } + + copyToken (name, token); + /* Add scope in case this is an INNER function + addToScope(name, token->scope); + */ + + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction: name isClass:%d scope:%s name:%s\n" + , name->isClass + , vStringValue(name->scope) + , vStringValue(name->string) + ); + ); + + readToken (token); + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_COLON) ) + { + /* + * function fname ():ReturnType + */ + readToken (token); + readToken (token); + } + + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction end: name isClass:%d scope:%s name:%s\n" + , name->isClass + , vStringValue(name->scope) + , vStringValue(name->string) + ); + ); + parseBlock (token, name); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction end2: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction end2: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction end3: name isClass:%d scope:%s name:%s\n" + , name->isClass + , vStringValue(name->scope) + , vStringValue(name->string) + ); + ); + makeFunctionTag (name); + } + + findCmdTerm (token); + + deleteToken (name); +} + +static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent) +{ + boolean read_next_token = TRUE; + vString * saveScope = vStringNew (); + + vStringClear(saveScope); + vStringCopy (saveScope, token->scope); + token->nestLevel++; + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseBlock start: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + /* + * Make this routine a bit more forgiving. + * If called on an open_curly advance it + */ + if ( isType (token, TOKEN_OPEN_CURLY) && + isKeyword(token, KEYWORD_NONE) ) + readToken(token); + + if (! isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * Read until we find the closing brace, + * any nested braces will be handled within + */ + do + { + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* Handle nested blocks */ + parseBlock (token, parent); + } + else + { + /* + * It is possible for a line to have no terminator + * if the following line is a closing brace. + * parseLine will detect this case and indicate + * whether we should read an additional token. + */ + read_next_token = parseLine (token); + } + + /* + * Always read a new token unless we find a statement without + * a ending terminator + */ + if( read_next_token ) + readToken(token); + + /* + * If we find a statement without a terminator consider the + * block finished, otherwise the stack will be off by one. + */ + } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token ); + } + + vStringDelete(saveScope); + token->nestLevel--; + + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseBlock end: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + return FALSE; +} + +static void parseMethods (tokenInfo *const token, tokenInfo *const class) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * validProperty : 2, + * validMethod : function(a,b) {} + * 'validMethod2' : function(a,b) {} + * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false} + */ + + do + { + readToken (token); + if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE)) + { + copyToken (name, token); + + readToken (token); + if ( isType (token, TOKEN_COLON) ) + { + readToken (token); + if ( isKeyword (token, KEYWORD_function) ) + { + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + addToScope (name, class->string); + makeFlexTag (name, FLEXTAG_METHOD); + parseBlock (token, name); + + /* + * Read to the closing curly, check next + * token, if a comma, we must loop again + */ + readToken (token); + } + } + else + { + addToScope (name, class->string); + makeFlexTag (name, FLEXTAG_PROPERTY); + + /* + * Read the next token, if a comma + * we must loop again + */ + readToken (token); + } + } + } + } while ( isType(token, TOKEN_COMMA) ); + + findCmdTerm (token); + + deleteToken (name); +} + +static boolean parseVar (tokenInfo *const token, boolean is_public) +{ + tokenInfo *const name = newToken (); + tokenInfo *const secondary_name = newToken (); + vString * saveScope = vStringNew (); + boolean is_terminated = TRUE; + + vStringClear(saveScope); + vStringCopy (saveScope, token->scope); + /* + * Variables are defined as: + * private static var lastFaultMessage:Date = new Date( 0 ); + * private static var webRequests:ArrayCollection = new ArrayCollection(); + */ + + if ( isKeyword(token, KEYWORD_var) ) + { + readToken(token); + } + + /* Variable name */ + copyToken (name, token); + readToken(token); + + if ( isType (token, TOKEN_COLON) ) + { + /* + * var vname ():DataType = new Date(); + * var vname ():DataType; + */ + readToken (token); + readToken (token); + } + + while (! isType (token, TOKEN_SEMICOLON) ) + { + readToken (token); + } + + if ( isType (token, TOKEN_SEMICOLON) ) + { + /* + * Only create variables for global scope + */ + /* if ( token->nestLevel == 0 && is_global ) */ + if ( is_public ) + { + if (isType (token, TOKEN_SEMICOLON)) + makeFlexTag (name, FLEXTAG_VARIABLE); + } + } + + vStringCopy(token->scope, saveScope); + deleteToken (name); + deleteToken (secondary_name); + vStringDelete(saveScope); + + return is_terminated; +} + +static boolean parseClass (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + vString * saveScope = vStringNew (); + boolean saveIsClass = token->isClass; + + vStringClear(saveScope); + vStringCopy (saveScope, token->scope); + /* + * Variables are defined as: + * private static var lastFaultMessage:Date = new Date( 0 ); + * private static var webRequests:ArrayCollection = new ArrayCollection(); + */ + + if ( isKeyword(token, KEYWORD_class) ) + { + readToken(token); + } + + token->isClass = TRUE; + /* Add class name to scope */ + addToScope(token, token->string); + /* Class name */ + copyToken (name, token); + readToken(token); + + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseClass start: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + makeClassTag (name); + parseBlock (token, name); + } + + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseClass end: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + vStringCopy(token->scope, saveScope); + token->isClass = saveIsClass; + deleteToken (name); + vStringDelete(saveScope); + + return TRUE; +} + +static boolean parseStatement (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + tokenInfo *const secondary_name = newToken (); + vString * saveScope = vStringNew (); + boolean is_public = FALSE; + boolean is_class = FALSE; + boolean is_terminated = TRUE; + boolean is_global = FALSE; + boolean is_prototype = FALSE; + vString * fulltag; + + vStringClear(saveScope); + vStringCopy (saveScope, token->scope); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseStatement: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + /* + * Functions can be named or unnamed. + * This deals with these formats: + * Function + * validFunctionOne = function(a,b) {} + * testlib.validFunctionFive = function(a,b) {} + * var innerThree = function(a,b) {} + * var innerFour = (a,b) {} + * var D2 = secondary_fcn_name(a,b) {} + * var D3 = new Function("a", "b", "return a+b;"); + * Class + * testlib.extras.ValidClassOne = function(a,b) { + * this.a = a; + * } + * Class Methods + * testlib.extras.ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * ValidClassTwo = function () + * { + * this.validMethodThree = function() {} + * // unnamed method + * this.validMethodFour = () {} + * } + * Database.prototype.validMethodThree = Database_getTodaysDate; + */ + + if ( isKeyword(token, KEYWORD_public) ) + { + is_public = TRUE; + readToken(token); + } + + if ( isKeyword(token, KEYWORD_private) ) + { + readToken(token); + } + + if ( isKeyword(token, KEYWORD_static) ) + { + readToken(token); + } + + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_for: + case KEYWORD_while: + case KEYWORD_do: + parseLoop (token); + break; + case KEYWORD_if: + case KEYWORD_else: + case KEYWORD_try: + case KEYWORD_catch: + case KEYWORD_finally: + /* Common semantics */ + is_terminated = parseIf (token); + break; + case KEYWORD_switch: + parseSwitch (token); + break; + case KEYWORD_class: + parseClass (token); + return is_terminated; + break; + case KEYWORD_function: + parseFunction (token); + return is_terminated; + break; + case KEYWORD_var: + parseVar (token, is_public); + return is_terminated; + break; + default: + readToken(token); + break; + } + } + + copyToken (name, token); + + while (! isType (token, TOKEN_CLOSE_CURLY) && + ! isType (token, TOKEN_SEMICOLON) && + ! isType (token, TOKEN_EQUAL_SIGN) ) + { + /* Potentially the name of the function */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Cannot be a global variable is it has dot references in the name + */ + is_global = FALSE; + do + { + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + if ( is_class ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + } + else + addContext (name, token); + } + else if ( isKeyword(token, KEYWORD_prototype) ) + { + /* + * When we reach the "prototype" tag, we infer: + * "BindAgent" is a class + * "build" is a method + * + * function BindAgent( repeatableIdName, newParentIdName ) { + * } + * + * CASE 1 + * Specified function name: "build" + * BindAgent.prototype.build = function( mode ) { + * ignore everything within this function + * } + * + * CASE 2 + * Prototype listing + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * + */ + makeClassTag (name); + is_class = TRUE; + is_prototype = TRUE; + + /* + * There should a ".function_name" next. + */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Handle CASE 1 + */ + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + + makeFlexTag (token, FLEXTAG_METHOD); + /* + * We can read until the end of the block / statement. + * We need to correctly parse any nested blocks, but + * we do NOT want to create any tags based on what is + * within the blocks. + */ + token->ignoreTag = TRUE; + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + else if (isType (token, TOKEN_EQUAL_SIGN)) + { + readToken (token); + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Handle CASE 2 + * + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ + parseMethods(token, name); + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + } + readToken (token); + } while (isType (token, TOKEN_PERIOD)); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_COLON) ) + { + /* + * Functions are of this form: + * function fname ():ReturnType { + */ + readToken (token); + readToken (token); + } + + if ( isType (token, TOKEN_OPEN_SQUARE) ) + skipArrayList(token); + + } + + if ( isType (token, TOKEN_CLOSE_CURLY) ) + { + /* + * Reaching this section without having + * processed an open curly brace indicates + * the statement is most likely not terminated. + */ + is_terminated = FALSE; + goto cleanUp; + } + + if ( isType (token, TOKEN_SEMICOLON) ) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * Handles this syntax: + * var g_var2; + */ + if (isType (token, TOKEN_SEMICOLON)) + makeFlexTag (name, FLEXTAG_VARIABLE); + } + /* + * Statement has ended. + * This deals with calls to functions, like: + * alert(..); + */ + goto cleanUp; + } + + if ( isType (token, TOKEN_EQUAL_SIGN) ) + { + readToken (token); + + if ( isKeyword (token, KEYWORD_function) ) + { + readToken (token); + + if ( isKeyword (token, KEYWORD_NONE) && + ! isType (token, TOKEN_OPEN_PAREN) ) + { + /* + * Functions of this format: + * var D2A = function theAdd(a, b) + * { + * return a+b; + * } + * Are really two separate defined functions and + * can be referenced in two ways: + * alert( D2A(1,2) ); // produces 3 + * alert( theAdd(1,2) ); // also produces 3 + * So it must have two tags: + * D2A + * theAdd + * Save the reference to the name for later use, once + * we have established this is a valid function we will + * create the secondary reference to it. + */ + copyToken (secondary_name, token); + readToken (token); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + if ( token->isClass ) + { + makeFlexTag (name, FLEXTAG_METHOD); + if ( vStringLength(secondary_name->string) > 0 ) + makeFunctionTag (secondary_name); + parseBlock (token, name); + } + else + { + parseBlock (token, name); + makeFunctionTag (name); + + if ( vStringLength(secondary_name->string) > 0 ) + makeFunctionTag (secondary_name); + + /* + * Find to the end of the statement + */ + goto cleanUp; + } + } + } + else if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions + * this.method_name = () {} + */ + skipArgumentList(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Nameless functions are only setup as methods. + */ + makeFlexTag (name, FLEXTAG_METHOD); + parseBlock (token, name); + } + } + else if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ + parseMethods(token, name); + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * Assume the closing parantheses terminates + * this statements. + */ + is_terminated = TRUE; + } + } + else if (isKeyword (token, KEYWORD_new)) + { + readToken (token); + if ( isKeyword (token, KEYWORD_function) || + isKeyword (token, KEYWORD_capital_function) || + isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + { + if ( isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + is_class = TRUE; + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if (isType (token, TOKEN_SEMICOLON)) + { + if ( token->nestLevel == 0 ) + { + if ( is_class ) + { + makeClassTag (name); + } else { + makeFunctionTag (name); + } + } + } + } + } + else if (isKeyword (token, KEYWORD_NONE)) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * A pointer can be created to the function. + * If we recognize the function/class name ignore the variable. + * This format looks identical to a variable definition. + * A variable defined outside of a block is considered + * a global variable: + * var g_var1 = 1; + * var g_var2; + * This is not a global variable: + * var g_var = function; + * This is a global variable: + * var g_var = different_var_name; + */ + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) && + ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + findCmdTerm (token); + if (isType (token, TOKEN_SEMICOLON)) + makeFlexTag (name, FLEXTAG_VARIABLE); + } + vStringDelete (fulltag); + } + } + } + findCmdTerm (token); + + /* + * Statements can be optionally terminated in the case of + * statement prior to a close curly brace as in the + * document.write line below: + * + * function checkForUpdate() { + * if( 1==1 ) { + * document.write("hello from checkForUpdate
") + * } + * return 1; + * } + */ + if ( ! is_terminated && isType (token, TOKEN_CLOSE_CURLY)) + is_terminated = FALSE; + + +cleanUp: + vStringCopy(token->scope, saveScope); + deleteToken (name); + deleteToken (secondary_name); + vStringDelete(saveScope); + + return is_terminated; +} + +static boolean parseLine (tokenInfo *const token) +{ + boolean is_terminated = TRUE; + /* + * Detect the common statements, if, while, for, do, ... + * This is necessary since the last statement within a block "{}" + * can be optionally terminated. + * + * If the statement is not terminated, we need to tell + * the calling routine to prevent reading an additional token + * looking for the end of the statement. + */ + + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_for: + case KEYWORD_while: + case KEYWORD_do: + parseLoop (token); + break; + case KEYWORD_if: + case KEYWORD_else: + case KEYWORD_try: + case KEYWORD_catch: + case KEYWORD_finally: + /* Common semantics */ + is_terminated = parseIf (token); + break; + case KEYWORD_switch: + parseSwitch (token); + break; + default: + parseStatement (token); + break; + } + } + else + { + /* + * Special case where single line statements may not be + * SEMICOLON terminated. parseBlock needs to know this + * so that it does not read the next token. + */ + is_terminated = parseStatement (token); + } + return is_terminated; +} + +static boolean parseCDATA (tokenInfo *const token) +{ + if (isType (token, TOKEN_LESS_THAN)) + { + /* + * Handle these tags + * + */ + readToken (token); + if (isType (token, TOKEN_EXCLAMATION)) + { + /* + * Not sure why I had to comment these out, but I did. + * readToken (token); + * if (isType (token, TOKEN_OPEN_SQUARE)) + * { + */ + readToken (token); + if (isKeyword (token, KEYWORD_cdata)) + { + readToken (token); + if (isType (token, TOKEN_OPEN_SQUARE)) + { + parseActionScript (token); + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + readToken (token); + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + readToken (token); + } + } + } + } + /*} Not sure */ + } + } + else + { + parseActionScript (token); + } + return TRUE; +} + +static boolean parseNamespace (tokenInfo *const token) +{ + /* + * If we have found a <, we know it is not a TOKEN_OPEN_MXML + * but it could potentially be a different namespace. + * This means it will also have a closing tag, which will + * mess up the parser if we do not properly recurse + * through these tags. + */ + + if (isType (token, TOKEN_LESS_THAN)) + { + readToken (token); + } + + /* + * Check if we have reached a other namespace tag + * + * or + * + * + */ + if (isType (token, TOKEN_IDENTIFIER)) + { + readToken (token); + if (isType (token, TOKEN_COLON)) + { + readToken (token); + if ( ! isType (token, TOKEN_IDENTIFIER)) + { + return TRUE; + } + } + else + { + return TRUE; + } + } + else + { + return TRUE; + } + + /* + * Confirmed we are inside a namespace tag, so + * process it until the close tag. + * + * But also check for new tags, which will either + * be recursive namespaces or MXML tags + */ + do + { + if (isType (token, TOKEN_LESS_THAN)) + { + parseNamespace (token); + readToken (token); + } + if (isType (token, TOKEN_OPEN_MXML)) + { + parseMXML (token); + } + else + { + readToken (token); + } + } while (! (isType (token, TOKEN_CLOSE_SGML) || isType (token, TOKEN_CLOSE_MXML)) ); + + return TRUE; +} + +static boolean parseMXML (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + tokenInfo *const type = newToken (); + boolean inside_attributes = TRUE; + /* + * Detect the common statements, if, while, for, do, ... + * This is necessary since the last statement within a block "{}" + * can be optionally terminated. + * + * If the statement is not terminated, we need to tell + * the calling routine to prevent reading an additional token + * looking for the end of the statement. + */ + + readToken (token); + + if (isKeyword (token, KEYWORD_script)) + { + /* + * These tags can be of this form: + * + */ + do + { + readToken (token); + } while (! (isType (token, TOKEN_CLOSE_SGML) || + isType (token, TOKEN_CLOSE_MXML) || + isType (token, TOKEN_GREATER_THAN)) ); + + if (isType (token, TOKEN_CLOSE_MXML)) + { + /* + * We have found a tag + * Finish reading the "type" and ">" + */ + readToken (token); + readToken (token); + goto cleanUp; + } + if (isType (token, TOKEN_CLOSE_SGML)) + { + /* + * We have found a + */ + goto cleanUp; + } + + /* + * This is a beginning of an embedded script. + * These typically are of this format: + * + * + * + */ + readToken (token); + parseCDATA (token); + + readToken (token); + if (isType (token, TOKEN_CLOSE_MXML)) + { + /* + * We have found a tag + * Finish reading the "type" and ">" + */ + readToken (token); + readToken (token); + } + goto cleanUp; + } + + copyToken (type, token); + + readToken (token); + do + { + if (isType (token, TOKEN_GREATER_THAN)) + { + inside_attributes = FALSE; + } + if (isType (token, TOKEN_LESS_THAN)) + { + parseNamespace (token); + readToken (token); + } + else if (isType (token, TOKEN_OPEN_MXML)) + { + parseMXML (token); + readToken (token); + } + else if (inside_attributes && (isKeyword (token, KEYWORD_id) || isKeyword (token, KEYWORD_name))) + { + if (vStringLength(name->string) == 0 ) + { + /* + * If we have already created the tag based on either "name" + * or "id" do not do it again. + */ + readToken (token); + readToken (token); + + copyToken (name, token); + addToScope (name, type->string); + makeMXTag (name); + } + else + { + readToken (token); + } + } + else + { + readToken (token); + } + } while (! (isType (token, TOKEN_CLOSE_SGML) || isType (token, TOKEN_CLOSE_MXML)) ); + + if (isType (token, TOKEN_CLOSE_MXML)) + { + /* + * We have found a tag + * Finish reading the "type" and ">" + */ + readToken (token); + readToken (token); + } + +cleanUp: + deleteToken (name); + deleteToken (type); + return TRUE; +} + +static boolean parseActionScript (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_LESS_THAN)) + { + /* + * Handle these tags + * + */ + readToken (token); + if (isType (token, TOKEN_EQUAL_SIGN)) + { + if (isType (token, TOKEN_OPEN_SQUARE)) + { + readToken (token); + if (isKeyword (token, KEYWORD_cdata)) + { + readToken (token); + } + } + } + } + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + /* + * Handle these tags + * + */ + readToken (token); + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + readToken (token); + if (isType (token, TOKEN_GREATER_THAN)) + { + return TRUE; + } + } + } + else if (isType (token, TOKEN_CLOSE_MXML)) + { + /* + * Read the Script> tags + */ + readToken (token); + readToken (token); + return TRUE; + } + else if (isType (token, TOKEN_OPEN_MXML)) + { + parseMXML (token); + } + else + { + if (isType(token, TOKEN_KEYWORD)) + { + if (isKeyword (token, KEYWORD_private) || + isKeyword (token, KEYWORD_public) || + isKeyword (token, KEYWORD_override) ) + { + /* + * Methods can be defined as: + * private function f_name + * public override function f_name + * override private function f_name + * Ignore these keywords if present. + */ + readToken (token); + } + if (isKeyword (token, KEYWORD_private) || + isKeyword (token, KEYWORD_public) || + isKeyword (token, KEYWORD_override) ) + { + /* + * Methods can be defined as: + * private function f_name + * public override function f_name + * override private function f_name + * Ignore these keywords if present. + */ + readToken (token); + } + + switch (token->keyword) + { + case KEYWORD_function: parseFunction (token); break; + default: parseLine (token); break; + } + } + else + { + parseLine (token); + } + } + } while (TRUE); +} + +static void parseFlexFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_OPEN_MXML)) + { + parseMXML (token); + } + else if (isType (token, TOKEN_LESS_THAN)) + { + readToken (token); + if (isType (token, TOKEN_QUESTION_MARK)) + { + /* + * + */ + readToken (token); + while (! isType (token, TOKEN_QUESTION_MARK) ) + { + readToken (token); + } + readToken (token); + } + else if (isKeyword (token, KEYWORD_NONE)) + { + /* + * This is a simple XML tag, read until the closing statement + * + * + */ + readToken (token); + while (! isType (token, TOKEN_GREATER_THAN) ) + { + readToken (token); + } + } + } + else + { + parseActionScript (token); + } + } while (TRUE); +} + +static void initialize (const langType language) +{ + Assert (sizeof (FlexKinds) / sizeof (FlexKinds [0]) == FLEXTAG_COUNT); + Lang_js = language; + buildFlexKeywordHash (); +} + +static void findFlexTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + ClassNames = stringListNew (); + FunctionNames = stringListNew (); + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + parseFlexFile (token); + + stringListDelete (ClassNames); + stringListDelete (FunctionNames); + ClassNames = NULL; + FunctionNames = NULL; + deleteToken (token); +} + +/* Create parser definition stucture */ +extern parserDefinition* FlexParser (void) +{ + static const char *const extensions [] = { "as", "mxml", NULL }; + parserDefinition *const def = parserNew ("Flex"); + def->extensions = extensions; + /* + * New definitions for parsing instead of regex + */ + def->kinds = FlexKinds; + def->kindCount = KIND_COUNT (FlexKinds); + def->parser = findFlexTags; + def->initialize = initialize; + + return def; +} +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/third_party/ctags/fortran.c b/third_party/ctags/fortran.c new file mode 100644 index 000000000..652df20bd --- /dev/null +++ b/third_party/ctags/fortran.c @@ -0,0 +1,2204 @@ +// clang-format off +/* +* $Id: fortran.c 660 2008-04-20 23:30:12Z elliotth $ +* +* Copyright (c) 1998-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Fortran language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/limits.h" +#include "libc/sysv/consts/_posix.h" +#include "libc/sysv/consts/iov.h" +#include "libc/sysv/consts/limits.h" +#include "libc/sysv/consts/xopen.h" +#include "libc/thread/thread.h" +#include "libc/str/str.h" /* to define tolower () */ +#include "libc/runtime/runtime.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* +* MACROS +*/ +#define isident(c) (isalnum(c) || (c) == '_') +#define isBlank(c) (boolean) (c == ' ' || c == '\t') +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) +#define isSecondaryKeyword(token,k) (boolean) ((token)->secondary == NULL ? \ + FALSE : (token)->secondary->keyword == (k)) + +/* +* DATA DECLARATIONS +*/ + +typedef enum eException { + ExceptionNone, ExceptionEOF, ExceptionFixedFormat, ExceptionLoop +} exception_t; + +/* Used to designate type of line read in fixed source form. + */ +typedef enum eFortranLineType { + LTYPE_UNDETERMINED, + LTYPE_INVALID, + LTYPE_COMMENT, + LTYPE_CONTINUATION, + LTYPE_EOF, + LTYPE_INITIAL, + LTYPE_SHORT +} lineType; + +/* Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_allocatable, + KEYWORD_assignment, + KEYWORD_automatic, + KEYWORD_block, + KEYWORD_byte, + KEYWORD_cexternal, + KEYWORD_cglobal, + KEYWORD_character, + KEYWORD_common, + KEYWORD_complex, + KEYWORD_contains, + KEYWORD_data, + KEYWORD_dimension, + KEYWORD_dllexport, + KEYWORD_dllimport, + KEYWORD_do, + KEYWORD_double, + KEYWORD_elemental, + KEYWORD_end, + KEYWORD_entry, + KEYWORD_equivalence, + KEYWORD_external, + KEYWORD_format, + KEYWORD_function, + KEYWORD_if, + KEYWORD_implicit, + KEYWORD_include, + KEYWORD_inline, + KEYWORD_integer, + KEYWORD_intent, + KEYWORD_interface, + KEYWORD_intrinsic, + KEYWORD_logical, + KEYWORD_map, + KEYWORD_module, + KEYWORD_namelist, + KEYWORD_operator, + KEYWORD_optional, + KEYWORD_parameter, + KEYWORD_pascal, + KEYWORD_pexternal, + KEYWORD_pglobal, + KEYWORD_pointer, + KEYWORD_precision, + KEYWORD_private, + KEYWORD_program, + KEYWORD_public, + KEYWORD_pure, + KEYWORD_real, + KEYWORD_record, + KEYWORD_recursive, + KEYWORD_save, + KEYWORD_select, + KEYWORD_sequence, + KEYWORD_static, + KEYWORD_stdcall, + KEYWORD_structure, + KEYWORD_subroutine, + KEYWORD_target, + KEYWORD_then, + KEYWORD_type, + KEYWORD_union, + KEYWORD_use, + KEYWORD_value, + KEYWORD_virtual, + KEYWORD_volatile, + KEYWORD_where, + KEYWORD_while +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_COMMA, + TOKEN_DOUBLE_COLON, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_LABEL, + TOKEN_NUMERIC, + TOKEN_OPERATOR, + TOKEN_PAREN_CLOSE, + TOKEN_PAREN_OPEN, + TOKEN_PERCENT, + TOKEN_STATEMENT_END, + TOKEN_STRING +} tokenType; + +typedef enum eTagType { + TAG_UNDEFINED = -1, + TAG_BLOCK_DATA, + TAG_COMMON_BLOCK, + TAG_ENTRY_POINT, + TAG_FUNCTION, + TAG_INTERFACE, + TAG_COMPONENT, + TAG_LABEL, + TAG_LOCAL, + TAG_MODULE, + TAG_NAMELIST, + TAG_PROGRAM, + TAG_SUBROUTINE, + TAG_DERIVED_TYPE, + TAG_VARIABLE, + TAG_COUNT /* must be last */ +} tagType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + tagType tag; + vString* string; + struct sTokenInfo *secondary; + unsigned long lineNumber; + fpos_t filePosition; +} tokenInfo; + +/* +* DATA DEFINITIONS +*/ + +static langType Lang_fortran; +static jmp_buf Exception; +static int Ungetc; +static unsigned int Column; +static boolean FreeSourceForm; +static boolean ParsingString; +static tokenInfo *Parent; + +/* indexed by tagType */ +static kindOption FortranKinds [] = { + { TRUE, 'b', "block data", "block data"}, + { TRUE, 'c', "common", "common blocks"}, + { TRUE, 'e', "entry", "entry points"}, + { TRUE, 'f', "function", "functions"}, + { FALSE, 'i', "interface", "interface contents, generic names, and operators"}, + { TRUE, 'k', "component", "type and structure components"}, + { TRUE, 'l', "label", "labels"}, + { FALSE, 'L', "local", "local, common block, and namelist variables"}, + { TRUE, 'm', "module", "modules"}, + { TRUE, 'n', "namelist", "namelists"}, + { TRUE, 'p', "program", "programs"}, + { TRUE, 's', "subroutine", "subroutines"}, + { TRUE, 't', "type", "derived types and structures"}, + { TRUE, 'v', "variable", "program (global) and module variables"} +}; + +/* For efinitions of Fortran 77 with extensions: + * http://www.fortran.com/fortran/F77_std/rjcnf0001.html + * http://scienide.uwaterloo.ca/MIPSpro7/007-2362-004/sgi_html/index.html + * + * For the Compaq Fortran Reference Manual: + * http://h18009.www1.hp.com/fortran/docs/lrm/dflrm.htm + */ + +static const keywordDesc FortranKeywordTable [] = { + /* keyword keyword ID */ + { "allocatable", KEYWORD_allocatable }, + { "assignment", KEYWORD_assignment }, + { "automatic", KEYWORD_automatic }, + { "block", KEYWORD_block }, + { "byte", KEYWORD_byte }, + { "cexternal", KEYWORD_cexternal }, + { "cglobal", KEYWORD_cglobal }, + { "character", KEYWORD_character }, + { "common", KEYWORD_common }, + { "complex", KEYWORD_complex }, + { "contains", KEYWORD_contains }, + { "data", KEYWORD_data }, + { "dimension", KEYWORD_dimension }, + { "dll_export", KEYWORD_dllexport }, + { "dll_import", KEYWORD_dllimport }, + { "do", KEYWORD_do }, + { "double", KEYWORD_double }, + { "elemental", KEYWORD_elemental }, + { "end", KEYWORD_end }, + { "entry", KEYWORD_entry }, + { "equivalence", KEYWORD_equivalence }, + { "external", KEYWORD_external }, + { "format", KEYWORD_format }, + { "function", KEYWORD_function }, + { "if", KEYWORD_if }, + { "implicit", KEYWORD_implicit }, + { "include", KEYWORD_include }, + { "inline", KEYWORD_inline }, + { "integer", KEYWORD_integer }, + { "intent", KEYWORD_intent }, + { "interface", KEYWORD_interface }, + { "intrinsic", KEYWORD_intrinsic }, + { "logical", KEYWORD_logical }, + { "map", KEYWORD_map }, + { "module", KEYWORD_module }, + { "namelist", KEYWORD_namelist }, + { "operator", KEYWORD_operator }, + { "optional", KEYWORD_optional }, + { "parameter", KEYWORD_parameter }, + { "pascal", KEYWORD_pascal }, + { "pexternal", KEYWORD_pexternal }, + { "pglobal", KEYWORD_pglobal }, + { "pointer", KEYWORD_pointer }, + { "precision", KEYWORD_precision }, + { "private", KEYWORD_private }, + { "program", KEYWORD_program }, + { "public", KEYWORD_public }, + { "pure", KEYWORD_pure }, + { "real", KEYWORD_real }, + { "record", KEYWORD_record }, + { "recursive", KEYWORD_recursive }, + { "save", KEYWORD_save }, + { "select", KEYWORD_select }, + { "sequence", KEYWORD_sequence }, + { "static", KEYWORD_static }, + { "stdcall", KEYWORD_stdcall }, + { "structure", KEYWORD_structure }, + { "subroutine", KEYWORD_subroutine }, + { "target", KEYWORD_target }, + { "then", KEYWORD_then }, + { "type", KEYWORD_type }, + { "union", KEYWORD_union }, + { "use", KEYWORD_use }, + { "value", KEYWORD_value }, + { "virtual", KEYWORD_virtual }, + { "volatile", KEYWORD_volatile }, + { "where", KEYWORD_where }, + { "while", KEYWORD_while } +}; + +static struct { + unsigned int count; + unsigned int max; + tokenInfo* list; +} Ancestors = { 0, 0, NULL }; + +/* +* FUNCTION PROTOTYPES +*/ +static void parseStructureStmt (tokenInfo *const token); +static void parseUnionStmt (tokenInfo *const token); +static void parseDerivedTypeDef (tokenInfo *const token); +static void parseFunctionSubprogram (tokenInfo *const token); +static void parseSubroutineSubprogram (tokenInfo *const token); + +/* +* FUNCTION DEFINITIONS +*/ + +static void ancestorPush (tokenInfo *const token) +{ + enum { incrementalIncrease = 10 }; + if (Ancestors.list == NULL) + { + Assert (Ancestors.max == 0); + Ancestors.count = 0; + Ancestors.max = incrementalIncrease; + Ancestors.list = xMalloc (Ancestors.max, tokenInfo); + } + else if (Ancestors.count == Ancestors.max) + { + Ancestors.max += incrementalIncrease; + Ancestors.list = xRealloc (Ancestors.list, Ancestors.max, tokenInfo); + } + Ancestors.list [Ancestors.count] = *token; + Ancestors.list [Ancestors.count].string = vStringNewCopy (token->string); + Ancestors.count++; +} + +static void ancestorPop (void) +{ + Assert (Ancestors.count > 0); + --Ancestors.count; + vStringDelete (Ancestors.list [Ancestors.count].string); + + Ancestors.list [Ancestors.count].type = TOKEN_UNDEFINED; + Ancestors.list [Ancestors.count].keyword = KEYWORD_NONE; + Ancestors.list [Ancestors.count].secondary = NULL; + Ancestors.list [Ancestors.count].tag = TAG_UNDEFINED; + Ancestors.list [Ancestors.count].string = NULL; + Ancestors.list [Ancestors.count].lineNumber = 0L; +} + +static const tokenInfo* ancestorScope (void) +{ + tokenInfo *result = NULL; + unsigned int i; + for (i = Ancestors.count ; i > 0 && result == NULL ; --i) + { + tokenInfo *const token = Ancestors.list + i - 1; + if (token->type == TOKEN_IDENTIFIER && + token->tag != TAG_UNDEFINED && token->tag != TAG_INTERFACE) + result = token; + } + return result; +} + +static const tokenInfo* ancestorTop (void) +{ + Assert (Ancestors.count > 0); + return &Ancestors.list [Ancestors.count - 1]; +} + +#define ancestorCount() (Ancestors.count) + +static void ancestorClear (void) +{ + while (Ancestors.count > 0) + ancestorPop (); + if (Ancestors.list != NULL) + eFree (Ancestors.list); + Ancestors.list = NULL; + Ancestors.count = 0; + Ancestors.max = 0; +} + +static boolean insideInterface (void) +{ + boolean result = FALSE; + unsigned int i; + for (i = 0 ; i < Ancestors.count && !result ; ++i) + { + if (Ancestors.list [i].tag == TAG_INTERFACE) + result = TRUE; + } + return result; +} + +static void buildFortranKeywordHash (void) +{ + const size_t count = + sizeof (FortranKeywordTable) / sizeof (FortranKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &FortranKeywordTable [i]; + addKeyword (p->name, Lang_fortran, (int) p->id); + } +} + +/* +* Tag generation functions +*/ + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->tag = TAG_UNDEFINED; + token->string = vStringNew (); + token->secondary = NULL; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static tokenInfo *newTokenFrom (tokenInfo *const token) +{ + tokenInfo *result = newToken (); + *result = *token; + result->string = vStringNewCopy (token->string); + token->secondary = NULL; + return result; +} + +static void deleteToken (tokenInfo *const token) +{ + if (token != NULL) + { + vStringDelete (token->string); + deleteToken (token->secondary); + token->secondary = NULL; + eFree (token); + } +} + +static boolean isFileScope (const tagType type) +{ + return (boolean) (type == TAG_LABEL || type == TAG_LOCAL); +} + +static boolean includeTag (const tagType type) +{ + boolean include; + Assert (type != TAG_UNDEFINED); + include = FortranKinds [(int) type].enabled; + if (include && isFileScope (type)) + include = Option.include.fileScope; + return include; +} + +static void makeFortranTag (tokenInfo *const token, tagType tag) +{ + token->tag = tag; + if (includeTag (token->tag)) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + + initTagEntry (&e, name); + + if (token->tag == TAG_COMMON_BLOCK) + e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.isFileScope = isFileScope (token->tag); + e.kindName = FortranKinds [token->tag].name; + e.kind = FortranKinds [token->tag].letter; + e.truncateLine = (boolean) (token->tag != TAG_LABEL); + + if (ancestorCount () > 0) + { + const tokenInfo* const scope = ancestorScope (); + if (scope != NULL) + { + e.extensionFields.scope [0] = FortranKinds [scope->tag].name; + e.extensionFields.scope [1] = vStringValue (scope->string); + } + } + if (! insideInterface () || includeTag (TAG_INTERFACE)) + makeTagEntry (&e); + } +} + +/* +* Parsing functions +*/ + +static int skipLine (void) +{ + int c; + + do + c = fileGetc (); + while (c != EOF && c != '\n'); + + return c; +} + +static void makeLabelTag (vString *const label) +{ + tokenInfo *token = newToken (); + token->type = TOKEN_LABEL; + vStringCopy (token->string, label); + makeFortranTag (token, TAG_LABEL); + deleteToken (token); +} + +static lineType getLineType (void) +{ + vString *label = vStringNew (); + int column = 0; + lineType type = LTYPE_UNDETERMINED; + + do /* read in first 6 "margin" characters */ + { + int c = fileGetc (); + + /* 3.2.1 Comment_Line. A comment line is any line that contains + * a C or an asterisk in column 1, or contains only blank characters + * in columns 1 through 72. A comment line that contains a C or + * an asterisk in column 1 may contain any character capable of + * representation in the processor in columns 2 through 72. + */ + /* EXCEPTION! Some compilers permit '!' as a commment character here. + * + * Treat # and $ in column 1 as comment to permit preprocessor directives. + * Treat D and d in column 1 as comment for HP debug statements. + */ + if (column == 0 && strchr ("*Cc!#$Dd", c) != NULL) + type = LTYPE_COMMENT; + else if (c == '\t') /* EXCEPTION! Some compilers permit a tab here */ + { + column = 8; + type = LTYPE_INITIAL; + } + else if (column == 5) + { + /* 3.2.2 Initial_Line. An initial line is any line that is not + * a comment line and contains the character blank or the digit 0 + * in column 6. Columns 1 through 5 may contain a statement label + * (3.4), or each of the columns 1 through 5 must contain the + * character blank. + */ + if (c == ' ' || c == '0') + type = LTYPE_INITIAL; + + /* 3.2.3 Continuation_Line. A continuation line is any line that + * contains any character of the FORTRAN character set other than + * the character blank or the digit 0 in column 6 and contains + * only blank characters in columns 1 through 5. + */ + else if (vStringLength (label) == 0) + type = LTYPE_CONTINUATION; + else + type = LTYPE_INVALID; + } + else if (c == ' ') + ; + else if (c == EOF) + type = LTYPE_EOF; + else if (c == '\n') + type = LTYPE_SHORT; + else if (isdigit (c)) + vStringPut (label, c); + else + type = LTYPE_INVALID; + + ++column; + } while (column < 6 && type == LTYPE_UNDETERMINED); + + Assert (type != LTYPE_UNDETERMINED); + + if (vStringLength (label) > 0) + { + vStringTerminate (label); + makeLabelTag (label); + } + vStringDelete (label); + return type; +} + +static int getFixedFormChar (void) +{ + boolean newline = FALSE; + lineType type; + int c = '\0'; + + if (Column > 0) + { +#ifdef STRICT_FIXED_FORM + /* EXCEPTION! Some compilers permit more than 72 characters per line. + */ + if (Column > 71) + c = skipLine (); + else +#endif + { + c = fileGetc (); + ++Column; + } + if (c == '\n') + { + newline = TRUE; /* need to check for continuation line */ + Column = 0; + } + else if (c == '!' && ! ParsingString) + { + c = skipLine (); + newline = TRUE; /* need to check for continuation line */ + Column = 0; + } + else if (c == '&') /* check for free source form */ + { + const int c2 = fileGetc (); + if (c2 == '\n') + longjmp (Exception, (int) ExceptionFixedFormat); + else + fileUngetc (c2); + } + } + while (Column == 0) + { + type = getLineType (); + switch (type) + { + case LTYPE_UNDETERMINED: + case LTYPE_INVALID: + longjmp (Exception, (int) ExceptionFixedFormat); + break; + + case LTYPE_SHORT: break; + case LTYPE_COMMENT: skipLine (); break; + + case LTYPE_EOF: + Column = 6; + if (newline) + c = '\n'; + else + c = EOF; + break; + + case LTYPE_INITIAL: + if (newline) + { + c = '\n'; + Column = 6; + break; + } + /* fall through to next case */ + case LTYPE_CONTINUATION: + Column = 5; + do + { + c = fileGetc (); + ++Column; + } while (isBlank (c)); + if (c == '\n') + Column = 0; + else if (Column > 6) + { + fileUngetc (c); + c = ' '; + } + break; + + default: + Assert ("Unexpected line type" == NULL); + } + } + return c; +} + +static int skipToNextLine (void) +{ + int c = skipLine (); + if (c != EOF) + c = fileGetc (); + return c; +} + +static int getFreeFormChar (void) +{ + static boolean newline = TRUE; + boolean advanceLine = FALSE; + int c = fileGetc (); + + /* If the last nonblank, non-comment character of a FORTRAN 90 + * free-format text line is an ampersand then the next non-comment + * line is a continuation line. + */ + if (c == '&') + { + do + c = fileGetc (); + while (isspace (c) && c != '\n'); + if (c == '\n') + { + newline = TRUE; + advanceLine = TRUE; + } + else if (c == '!') + advanceLine = TRUE; + else + { + fileUngetc (c); + c = '&'; + } + } + else if (newline && (c == '!' || c == '#')) + advanceLine = TRUE; + while (advanceLine) + { + while (isspace (c)) + c = fileGetc (); + if (c == '!' || (newline && c == '#')) + { + c = skipToNextLine (); + newline = TRUE; + continue; + } + if (c == '&') + c = fileGetc (); + else + advanceLine = FALSE; + } + newline = (boolean) (c == '\n'); + return c; +} + +static int getChar (void) +{ + int c; + + if (Ungetc != '\0') + { + c = Ungetc; + Ungetc = '\0'; + } + else if (FreeSourceForm) + c = getFreeFormChar (); + else + c = getFixedFormChar (); + return c; +} + +static void ungetChar (const int c) +{ + Ungetc = c; +} + +/* If a numeric is passed in 'c', this is used as the first digit of the + * numeric being parsed. + */ +static vString *parseInteger (int c) +{ + vString *string = vStringNew (); + + if (c == '-') + { + vStringPut (string, c); + c = getChar (); + } + else if (! isdigit (c)) + c = getChar (); + while (c != EOF && isdigit (c)) + { + vStringPut (string, c); + c = getChar (); + } + vStringTerminate (string); + + if (c == '_') + { + do + c = getChar (); + while (c != EOF && isalpha (c)); + } + ungetChar (c); + + return string; +} + +static vString *parseNumeric (int c) +{ + vString *string = vStringNew (); + vString *integer = parseInteger (c); + vStringCopy (string, integer); + vStringDelete (integer); + + c = getChar (); + if (c == '.') + { + integer = parseInteger ('\0'); + vStringPut (string, c); + vStringCat (string, integer); + vStringDelete (integer); + c = getChar (); + } + if (tolower (c) == 'e') + { + integer = parseInteger ('\0'); + vStringPut (string, c); + vStringCat (string, integer); + vStringDelete (integer); + } + else + ungetChar (c); + + vStringTerminate (string); + + return string; +} + +static void parseString (vString *const string, const int delimiter) +{ + const unsigned long inputLineNumber = getInputLineNumber (); + int c; + ParsingString = TRUE; + c = getChar (); + while (c != delimiter && c != '\n' && c != EOF) + { + vStringPut (string, c); + c = getChar (); + } + if (c == '\n' || c == EOF) + { + verbose ("%s: unterminated character string at line %lu\n", + getInputFileName (), inputLineNumber); + if (c == EOF) + longjmp (Exception, (int) ExceptionEOF); + else if (! FreeSourceForm) + longjmp (Exception, (int) ExceptionFixedFormat); + } + vStringTerminate (string); + ParsingString = FALSE; +} + +/* Read a C identifier beginning with "firstChar" and places it into "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + + do + { + vStringPut (string, c); + c = getChar (); + } while (isident (c)); + + vStringTerminate (string); + ungetChar (c); /* unget non-identifier character */ +} + +static void checkForLabel (void) +{ + tokenInfo* token = NULL; + int length; + int c; + + do + c = getChar (); + while (isBlank (c)); + + for (length = 0 ; isdigit (c) && length < 5 ; ++length) + { + if (token == NULL) + { + token = newToken (); + token->type = TOKEN_LABEL; + } + vStringPut (token->string, c); + c = getChar (); + } + if (length > 0 && token != NULL) + { + vStringTerminate (token->string); + makeFortranTag (token, TAG_LABEL); + deleteToken (token); + } + ungetChar (c); +} + +static void readIdentifier (tokenInfo *const token, const int c) +{ + parseIdentifier (token->string, c); + token->keyword = analyzeToken (token->string, Lang_fortran); + if (! isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_KEYWORD; + else + { + token->type = TOKEN_IDENTIFIER; + if (strncmp (vStringValue (token->string), "end", 3) == 0) + { + vString *const sub = vStringNewInit (vStringValue (token->string) + 3); + const keywordId kw = analyzeToken (sub, Lang_fortran); + vStringDelete (sub); + if (kw != KEYWORD_NONE) + { + token->secondary = newToken (); + token->secondary->type = TOKEN_KEYWORD; + token->secondary->keyword = kw; + token->keyword = KEYWORD_end; + } + } + } +} + +static void readToken (tokenInfo *const token) +{ + int c; + + deleteToken (token->secondary); + token->type = TOKEN_UNDEFINED; + token->tag = TAG_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->secondary = NULL; + vStringClear (token->string); + +getNextChar: + c = getChar (); + + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + switch (c) + { + case EOF: longjmp (Exception, (int) ExceptionEOF); break; + case ' ': goto getNextChar; + case '\t': goto getNextChar; + case ',': token->type = TOKEN_COMMA; break; + case '(': token->type = TOKEN_PAREN_OPEN; break; + case ')': token->type = TOKEN_PAREN_CLOSE; break; + case '%': token->type = TOKEN_PERCENT; break; + + case '*': + case '/': + case '+': + case '-': + case '=': + case '<': + case '>': + { + const char *const operatorChars = "*/+=<>"; + do { + vStringPut (token->string, c); + c = getChar (); + } while (strchr (operatorChars, c) != NULL); + ungetChar (c); + vStringTerminate (token->string); + token->type = TOKEN_OPERATOR; + break; + } + + case '!': + if (FreeSourceForm) + { + do + c = getChar (); + while (c != '\n' && c != EOF); + } + else + { + skipLine (); + Column = 0; + } + /* fall through to newline case */ + case '\n': + token->type = TOKEN_STATEMENT_END; + if (FreeSourceForm) + checkForLabel (); + break; + + case '.': + parseIdentifier (token->string, c); + c = getChar (); + if (c == '.') + { + vStringPut (token->string, c); + vStringTerminate (token->string); + token->type = TOKEN_OPERATOR; + } + else + { + ungetChar (c); + token->type = TOKEN_UNDEFINED; + } + break; + + case '"': + case '\'': + parseString (token->string, c); + token->type = TOKEN_STRING; + break; + + case ';': + token->type = TOKEN_STATEMENT_END; + break; + + case ':': + c = getChar (); + if (c == ':') + token->type = TOKEN_DOUBLE_COLON; + else + { + ungetChar (c); + token->type = TOKEN_UNDEFINED; + } + break; + + default: + if (isalpha (c)) + readIdentifier (token, c); + else if (isdigit (c)) + { + vString *numeric = parseNumeric (c); + vStringCat (token->string, numeric); + vStringDelete (numeric); + token->type = TOKEN_NUMERIC; + } + else + token->type = TOKEN_UNDEFINED; + break; + } +} + +static void readSubToken (tokenInfo *const token) +{ + if (token->secondary == NULL) + { + token->secondary = newToken (); + readToken (token->secondary); + } +} + +/* +* Scanning functions +*/ + +static void skipToToken (tokenInfo *const token, tokenType type) +{ + while (! isType (token, type) && ! isType (token, TOKEN_STATEMENT_END) && + !(token->secondary != NULL && isType (token->secondary, TOKEN_STATEMENT_END))) + readToken (token); +} + +static void skipPast (tokenInfo *const token, tokenType type) +{ + skipToToken (token, type); + if (! isType (token, TOKEN_STATEMENT_END)) + readToken (token); +} + +static void skipToNextStatement (tokenInfo *const token) +{ + do + { + skipToToken (token, TOKEN_STATEMENT_END); + readToken (token); + } while (isType (token, TOKEN_STATEMENT_END)); +} + +/* skip over parenthesis enclosed contents starting at next token. + * Token is left at the first token following closing parenthesis. If an + * opening parenthesis is not found, `token' is moved to the end of the + * statement. + */ +static void skipOverParens (tokenInfo *const token) +{ + int level = 0; + do { + if (isType (token, TOKEN_STATEMENT_END)) + break; + else if (isType (token, TOKEN_PAREN_OPEN)) + ++level; + else if (isType (token, TOKEN_PAREN_CLOSE)) + --level; + readToken (token); + } while (level > 0); +} + +static boolean isTypeSpec (tokenInfo *const token) +{ + boolean result; + switch (token->keyword) + { + case KEYWORD_byte: + case KEYWORD_integer: + case KEYWORD_real: + case KEYWORD_double: + case KEYWORD_complex: + case KEYWORD_character: + case KEYWORD_logical: + case KEYWORD_record: + case KEYWORD_type: + result = TRUE; + break; + default: + result = FALSE; + break; + } + return result; +} + +static boolean isSubprogramPrefix (tokenInfo *const token) +{ + boolean result; + switch (token->keyword) + { + case KEYWORD_elemental: + case KEYWORD_pure: + case KEYWORD_recursive: + case KEYWORD_stdcall: + result = TRUE; + break; + default: + result = FALSE; + break; + } + return result; +} + +/* type-spec + * is INTEGER [kind-selector] + * or REAL [kind-selector] is ( etc. ) + * or DOUBLE PRECISION + * or COMPLEX [kind-selector] + * or CHARACTER [kind-selector] + * or LOGICAL [kind-selector] + * or TYPE ( type-name ) + * + * Note that INTEGER and REAL may be followed by "*N" where "N" is an integer + */ +static void parseTypeSpec (tokenInfo *const token) +{ + /* parse type-spec, leaving `token' at first token following type-spec */ + Assert (isTypeSpec (token)); + switch (token->keyword) + { + case KEYWORD_character: + /* skip char-selector */ + readToken (token); + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "*") == 0) + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); + else if (isType (token, TOKEN_NUMERIC)) + readToken (token); + break; + + + case KEYWORD_byte: + case KEYWORD_complex: + case KEYWORD_integer: + case KEYWORD_logical: + case KEYWORD_real: + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); /* skip kind-selector */ + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "*") == 0) + { + readToken (token); + readToken (token); + } + break; + + case KEYWORD_double: + readToken (token); + if (isKeyword (token, KEYWORD_complex) || + isKeyword (token, KEYWORD_precision)) + readToken (token); + else + skipToToken (token, TOKEN_STATEMENT_END); + break; + + case KEYWORD_record: + readToken (token); + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "/") == 0) + { + readToken (token); /* skip to structure name */ + readToken (token); /* skip to '/' */ + readToken (token); /* skip to variable name */ + } + break; + + case KEYWORD_type: + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); /* skip type-name */ + else + parseDerivedTypeDef (token); + break; + + default: + skipToToken (token, TOKEN_STATEMENT_END); + break; + } +} + +static boolean skipStatementIfKeyword (tokenInfo *const token, keywordId keyword) +{ + boolean result = FALSE; + if (isKeyword (token, keyword)) + { + result = TRUE; + skipToNextStatement (token); + } + return result; +} + +/* parse a list of qualifying specifiers, leaving `token' at first token + * following list. Examples of such specifiers are: + * [[, attr-spec] ::] + * [[, component-attr-spec-list] ::] + * + * attr-spec + * is PARAMETER + * or access-spec (is PUBLIC or PRIVATE) + * or ALLOCATABLE + * or DIMENSION ( array-spec ) + * or EXTERNAL + * or INTENT ( intent-spec ) + * or INTRINSIC + * or OPTIONAL + * or POINTER + * or SAVE + * or TARGET + * + * component-attr-spec + * is POINTER + * or DIMENSION ( component-array-spec ) + */ +static void parseQualifierSpecList (tokenInfo *const token) +{ + do + { + readToken (token); /* should be an attr-spec */ + switch (token->keyword) + { + case KEYWORD_parameter: + case KEYWORD_allocatable: + case KEYWORD_external: + case KEYWORD_intrinsic: + case KEYWORD_optional: + case KEYWORD_private: + case KEYWORD_pointer: + case KEYWORD_public: + case KEYWORD_save: + case KEYWORD_target: + readToken (token); + break; + + case KEYWORD_dimension: + case KEYWORD_intent: + readToken (token); + skipOverParens (token); + break; + + default: skipToToken (token, TOKEN_STATEMENT_END); break; + } + } while (isType (token, TOKEN_COMMA)); + if (! isType (token, TOKEN_DOUBLE_COLON)) + skipToToken (token, TOKEN_STATEMENT_END); +} + +static tagType variableTagType (void) +{ + tagType result = TAG_VARIABLE; + if (ancestorCount () > 0) + { + const tokenInfo* const parent = ancestorTop (); + switch (parent->tag) + { + case TAG_MODULE: result = TAG_VARIABLE; break; + case TAG_DERIVED_TYPE: result = TAG_COMPONENT; break; + case TAG_FUNCTION: result = TAG_LOCAL; break; + case TAG_SUBROUTINE: result = TAG_LOCAL; break; + default: result = TAG_VARIABLE; break; + } + } + return result; +} + +static void parseEntityDecl (tokenInfo *const token) +{ + Assert (isType (token, TOKEN_IDENTIFIER)); + makeFortranTag (token, variableTagType ()); + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "*") == 0) + { + readToken (token); /* read char-length */ + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); + else + readToken (token); + } + if (isType (token, TOKEN_OPERATOR)) + { + if (strcmp (vStringValue (token->string), "/") == 0) + { /* skip over initializations of structure field */ + readToken (token); + skipPast (token, TOKEN_OPERATOR); + } + else if (strcmp (vStringValue (token->string), "=") == 0) + { + while (! isType (token, TOKEN_COMMA) && + ! isType (token, TOKEN_STATEMENT_END)) + { + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); + } + } + } + /* token left at either comma or statement end */ +} + +static void parseEntityDeclList (tokenInfo *const token) +{ + if (isType (token, TOKEN_PERCENT)) + skipToNextStatement (token); + else while (isType (token, TOKEN_IDENTIFIER) || + (isType (token, TOKEN_KEYWORD) && + !isKeyword (token, KEYWORD_function) && + !isKeyword (token, KEYWORD_subroutine))) + { + /* compilers accept keywoeds as identifiers */ + if (isType (token, TOKEN_KEYWORD)) + token->type = TOKEN_IDENTIFIER; + parseEntityDecl (token); + if (isType (token, TOKEN_COMMA)) + readToken (token); + else if (isType (token, TOKEN_STATEMENT_END)) + { + skipToNextStatement (token); + break; + } + } +} + +/* type-declaration-stmt is + * type-spec [[, attr-spec] ... ::] entity-decl-list + */ +static void parseTypeDeclarationStmt (tokenInfo *const token) +{ + Assert (isTypeSpec (token)); + parseTypeSpec (token); + if (!isType (token, TOKEN_STATEMENT_END)) /* if not end of derived type... */ + { + if (isType (token, TOKEN_COMMA)) + parseQualifierSpecList (token); + if (isType (token, TOKEN_DOUBLE_COLON)) + readToken (token); + parseEntityDeclList (token); + } + if (isType (token, TOKEN_STATEMENT_END)) + skipToNextStatement (token); +} + +/* namelist-stmt is + * NAMELIST /namelist-group-name/ namelist-group-object-list + * [[,]/[namelist-group-name]/ namelist-block-object-list] ... + * + * namelist-group-object is + * variable-name + * + * common-stmt is + * COMMON [/[common-block-name]/] common-block-object-list + * [[,]/[common-block-name]/ common-block-object-list] ... + * + * common-block-object is + * variable-name [ ( explicit-shape-spec-list ) ] + */ +static void parseCommonNamelistStmt (tokenInfo *const token, tagType type) +{ + Assert (isKeyword (token, KEYWORD_common) || + isKeyword (token, KEYWORD_namelist)); + readToken (token); + do + { + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "/") == 0) + { + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { + makeFortranTag (token, type); + readToken (token); + } + skipPast (token, TOKEN_OPERATOR); + } + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_LOCAL); + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); /* skip explicit-shape-spec-list */ + if (isType (token, TOKEN_COMMA)) + readToken (token); + } while (! isType (token, TOKEN_STATEMENT_END)); + skipToNextStatement (token); +} + +static void parseFieldDefinition (tokenInfo *const token) +{ + if (isTypeSpec (token)) + parseTypeDeclarationStmt (token); + else if (isKeyword (token, KEYWORD_structure)) + parseStructureStmt (token); + else if (isKeyword (token, KEYWORD_union)) + parseUnionStmt (token); + else + skipToNextStatement (token); +} + +static void parseMap (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_map)); + skipToNextStatement (token); + while (! isKeyword (token, KEYWORD_end)) + parseFieldDefinition (token); + readSubToken (token); + /* should be at KEYWORD_map token */ + skipToNextStatement (token); +} + +/* UNION + * MAP + * [field-definition] [field-definition] ... + * END MAP + * MAP + * [field-definition] [field-definition] ... + * END MAP + * [MAP + * [field-definition] + * [field-definition] ... + * END MAP] ... + * END UNION + * * + * + * Typed data declarations (variables or arrays) in structure declarations + * have the form of normal Fortran typed data declarations. Data items with + * different types can be freely intermixed within a structure declaration. + * + * Unnamed fields can be declared in a structure by specifying the pseudo + * name %FILL in place of an actual field name. You can use this mechanism to + * generate empty space in a record for purposes such as alignment. + * + * All mapped field declarations that are made within a UNION declaration + * share a common location within the containing structure. When initializing + * the fields within a UNION, the final initialization value assigned + * overlays any value previously assigned to a field definition that shares + * that field. + */ +static void parseUnionStmt (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_union)); + skipToNextStatement (token); + while (isKeyword (token, KEYWORD_map)) + parseMap (token); + /* should be at KEYWORD_end token */ + readSubToken (token); + /* secondary token should be KEYWORD_end token */ + skipToNextStatement (token); +} + +/* STRUCTURE [/structure-name/] [field-names] + * [field-definition] + * [field-definition] ... + * END STRUCTURE + * + * structure-name + * identifies the structure in a subsequent RECORD statement. + * Substructures can be established within a structure by means of either + * a nested STRUCTURE declaration or a RECORD statement. + * + * field-names + * (for substructure declarations only) one or more names having the + * structure of the substructure being defined. + * + * field-definition + * can be one or more of the following: + * + * Typed data declarations, which can optionally include one or more + * data initialization values. + * + * Substructure declarations (defined by either RECORD statements or + * subsequent STRUCTURE statements). + * + * UNION declarations, which are mapped fields defined by a block of + * statements. The syntax of a UNION declaration is described below. + * + * PARAMETER statements, which do not affect the form of the + * structure. + */ +static void parseStructureStmt (tokenInfo *const token) +{ + tokenInfo *name; + Assert (isKeyword (token, KEYWORD_structure)); + readToken (token); + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "/") == 0) + { /* read structure name */ + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_DERIVED_TYPE); + name = newTokenFrom (token); + skipPast (token, TOKEN_OPERATOR); + } + else + { /* fake out anonymous structure */ + name = newToken (); + name->type = TOKEN_IDENTIFIER; + name->tag = TAG_DERIVED_TYPE; + vStringCopyS (name->string, "anonymous"); + } + while (isType (token, TOKEN_IDENTIFIER)) + { /* read field names */ + makeFortranTag (token, TAG_COMPONENT); + readToken (token); + if (isType (token, TOKEN_COMMA)) + readToken (token); + } + skipToNextStatement (token); + ancestorPush (name); + while (! isKeyword (token, KEYWORD_end)) + parseFieldDefinition (token); + readSubToken (token); + /* secondary token should be KEYWORD_structure token */ + skipToNextStatement (token); + ancestorPop (); + deleteToken (name); +} + +/* specification-stmt + * is access-stmt (is access-spec [[::] access-id-list) + * or allocatable-stmt (is ALLOCATABLE [::] array-name etc.) + * or common-stmt (is COMMON [ / [common-block-name] /] etc.) + * or data-stmt (is DATA data-stmt-list [[,] data-stmt-set] ...) + * or dimension-stmt (is DIMENSION [::] array-name etc.) + * or equivalence-stmt (is EQUIVALENCE equivalence-set-list) + * or external-stmt (is EXTERNAL etc.) + * or intent-stmt (is INTENT ( intent-spec ) [::] etc.) + * or instrinsic-stmt (is INTRINSIC etc.) + * or namelist-stmt (is NAMELIST / namelist-group-name / etc.) + * or optional-stmt (is OPTIONAL [::] etc.) + * or pointer-stmt (is POINTER [::] object-name etc.) + * or save-stmt (is SAVE etc.) + * or target-stmt (is TARGET [::] object-name etc.) + * + * access-spec is PUBLIC or PRIVATE + */ +static boolean parseSpecificationStmt (tokenInfo *const token) +{ + boolean result = TRUE; + switch (token->keyword) + { + case KEYWORD_common: + parseCommonNamelistStmt (token, TAG_COMMON_BLOCK); + break; + + case KEYWORD_namelist: + parseCommonNamelistStmt (token, TAG_NAMELIST); + break; + + case KEYWORD_structure: + parseStructureStmt (token); + break; + + case KEYWORD_allocatable: + case KEYWORD_data: + case KEYWORD_dimension: + case KEYWORD_equivalence: + case KEYWORD_external: + case KEYWORD_intent: + case KEYWORD_intrinsic: + case KEYWORD_optional: + case KEYWORD_pointer: + case KEYWORD_private: + case KEYWORD_public: + case KEYWORD_save: + case KEYWORD_target: + skipToNextStatement (token); + break; + + default: + result = FALSE; + break; + } + return result; +} + +/* component-def-stmt is + * type-spec [[, component-attr-spec-list] ::] component-decl-list + * + * component-decl is + * component-name [ ( component-array-spec ) ] [ * char-length ] + */ +static void parseComponentDefStmt (tokenInfo *const token) +{ + Assert (isTypeSpec (token)); + parseTypeSpec (token); + if (isType (token, TOKEN_COMMA)) + parseQualifierSpecList (token); + if (isType (token, TOKEN_DOUBLE_COLON)) + readToken (token); + parseEntityDeclList (token); +} + +/* derived-type-def is + * derived-type-stmt is (TYPE [[, access-spec] ::] type-name + * [private-sequence-stmt] ... (is PRIVATE or SEQUENCE) + * component-def-stmt + * [component-def-stmt] ... + * end-type-stmt + */ +static void parseDerivedTypeDef (tokenInfo *const token) +{ + if (isType (token, TOKEN_COMMA)) + parseQualifierSpecList (token); + if (isType (token, TOKEN_DOUBLE_COLON)) + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_DERIVED_TYPE); + ancestorPush (token); + skipToNextStatement (token); + if (isKeyword (token, KEYWORD_private) || + isKeyword (token, KEYWORD_sequence)) + { + skipToNextStatement (token); + } + while (! isKeyword (token, KEYWORD_end)) + { + if (isTypeSpec (token)) + parseComponentDefStmt (token); + else + skipToNextStatement (token); + } + readSubToken (token); + /* secondary token should be KEYWORD_type token */ + skipToToken (token, TOKEN_STATEMENT_END); + ancestorPop (); +} + +/* interface-block + * interface-stmt (is INTERFACE [generic-spec]) + * [interface-body] + * [module-procedure-stmt] ... + * end-interface-stmt (is END INTERFACE) + * + * generic-spec + * is generic-name + * or OPERATOR ( defined-operator ) + * or ASSIGNMENT ( = ) + * + * interface-body + * is function-stmt + * [specification-part] + * end-function-stmt + * or subroutine-stmt + * [specification-part] + * end-subroutine-stmt + * + * module-procedure-stmt is + * MODULE PROCEDURE procedure-name-list + */ +static void parseInterfaceBlock (tokenInfo *const token) +{ + tokenInfo *name = NULL; + Assert (isKeyword (token, KEYWORD_interface)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { + makeFortranTag (token, TAG_INTERFACE); + name = newTokenFrom (token); + } + else if (isKeyword (token, KEYWORD_assignment) || + isKeyword (token, KEYWORD_operator)) + { + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + readToken (token); + if (isType (token, TOKEN_OPERATOR)) + { + makeFortranTag (token, TAG_INTERFACE); + name = newTokenFrom (token); + } + } + if (name == NULL) + { + name = newToken (); + name->type = TOKEN_IDENTIFIER; + name->tag = TAG_INTERFACE; + } + ancestorPush (name); + while (! isKeyword (token, KEYWORD_end)) + { + switch (token->keyword) + { + case KEYWORD_function: parseFunctionSubprogram (token); break; + case KEYWORD_subroutine: parseSubroutineSubprogram (token); break; + + default: + if (isSubprogramPrefix (token)) + readToken (token); + else if (isTypeSpec (token)) + parseTypeSpec (token); + else + skipToNextStatement (token); + break; + } + } + readSubToken (token); + /* secondary token should be KEYWORD_interface token */ + skipToNextStatement (token); + ancestorPop (); + deleteToken (name); +} + +/* entry-stmt is + * ENTRY entry-name [ ( dummy-arg-list ) ] + */ +static void parseEntryStmt (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_entry)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_ENTRY_POINT); + skipToNextStatement (token); +} + +/* stmt-function-stmt is + * function-name ([dummy-arg-name-list]) = scalar-expr + */ +static boolean parseStmtFunctionStmt (tokenInfo *const token) +{ + boolean result = FALSE; + Assert (isType (token, TOKEN_IDENTIFIER)); +#if 0 /* cannot reliably parse this yet */ + makeFortranTag (token, TAG_FUNCTION); +#endif + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + { + skipOverParens (token); + result = (boolean) (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "=") == 0); + } + skipToNextStatement (token); + return result; +} + +static boolean isIgnoredDeclaration (tokenInfo *const token) +{ + boolean result; + switch (token->keyword) + { + case KEYWORD_cexternal: + case KEYWORD_cglobal: + case KEYWORD_dllexport: + case KEYWORD_dllimport: + case KEYWORD_external: + case KEYWORD_format: + case KEYWORD_include: + case KEYWORD_inline: + case KEYWORD_parameter: + case KEYWORD_pascal: + case KEYWORD_pexternal: + case KEYWORD_pglobal: + case KEYWORD_static: + case KEYWORD_value: + case KEYWORD_virtual: + case KEYWORD_volatile: + result = TRUE; + break; + + default: + result = FALSE; + break; + } + return result; +} + +/* declaration-construct + * [derived-type-def] + * [interface-block] + * [type-declaration-stmt] + * [specification-stmt] + * [parameter-stmt] (is PARAMETER ( named-constant-def-list ) + * [format-stmt] (is FORMAT format-specification) + * [entry-stmt] + * [stmt-function-stmt] + */ +static boolean parseDeclarationConstruct (tokenInfo *const token) +{ + boolean result = TRUE; + switch (token->keyword) + { + case KEYWORD_entry: parseEntryStmt (token); break; + case KEYWORD_interface: parseInterfaceBlock (token); break; + case KEYWORD_stdcall: readToken (token); break; + /* derived type handled by parseTypeDeclarationStmt(); */ + + case KEYWORD_automatic: + readToken (token); + if (isTypeSpec (token)) + parseTypeDeclarationStmt (token); + else + skipToNextStatement (token); + result = TRUE; + break; + + default: + if (isIgnoredDeclaration (token)) + skipToNextStatement (token); + else if (isTypeSpec (token)) + { + parseTypeDeclarationStmt (token); + result = TRUE; + } + else if (isType (token, TOKEN_IDENTIFIER)) + result = parseStmtFunctionStmt (token); + else + result = parseSpecificationStmt (token); + break; + } + return result; +} + +/* implicit-part-stmt + * is [implicit-stmt] (is IMPLICIT etc.) + * or [parameter-stmt] (is PARAMETER etc.) + * or [format-stmt] (is FORMAT etc.) + * or [entry-stmt] (is ENTRY entry-name etc.) + */ +static boolean parseImplicitPartStmt (tokenInfo *const token) +{ + boolean result = TRUE; + switch (token->keyword) + { + case KEYWORD_entry: parseEntryStmt (token); break; + + case KEYWORD_implicit: + case KEYWORD_include: + case KEYWORD_parameter: + case KEYWORD_format: + skipToNextStatement (token); + break; + + default: result = FALSE; break; + } + return result; +} + +/* specification-part is + * [use-stmt] ... (is USE module-name etc.) + * [implicit-part] (is [implicit-part-stmt] ... [implicit-stmt]) + * [declaration-construct] ... + */ +static boolean parseSpecificationPart (tokenInfo *const token) +{ + boolean result = FALSE; + while (skipStatementIfKeyword (token, KEYWORD_use)) + result = TRUE; + while (parseImplicitPartStmt (token)) + result = TRUE; + while (parseDeclarationConstruct (token)) + result = TRUE; + return result; +} + +/* block-data is + * block-data-stmt (is BLOCK DATA [block-data-name] + * [specification-part] + * end-block-data-stmt (is END [BLOCK DATA [block-data-name]]) + */ +static void parseBlockData (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_block)); + readToken (token); + if (isKeyword (token, KEYWORD_data)) + { + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_BLOCK_DATA); + } + ancestorPush (token); + skipToNextStatement (token); + parseSpecificationPart (token); + while (! isKeyword (token, KEYWORD_end)) + skipToNextStatement (token); + readSubToken (token); + /* secondary token should be KEYWORD_NONE or KEYWORD_block token */ + skipToNextStatement (token); + ancestorPop (); +} + +/* internal-subprogram-part is + * contains-stmt (is CONTAINS) + * internal-subprogram + * [internal-subprogram] ... + * + * internal-subprogram + * is function-subprogram + * or subroutine-subprogram + */ +static void parseInternalSubprogramPart (tokenInfo *const token) +{ + boolean done = FALSE; + if (isKeyword (token, KEYWORD_contains)) + skipToNextStatement (token); + do + { + switch (token->keyword) + { + case KEYWORD_function: parseFunctionSubprogram (token); break; + case KEYWORD_subroutine: parseSubroutineSubprogram (token); break; + case KEYWORD_end: done = TRUE; break; + + default: + if (isSubprogramPrefix (token)) + readToken (token); + else if (isTypeSpec (token)) + parseTypeSpec (token); + else + readToken (token); + break; + } + } while (! done); +} + +/* module is + * module-stmt (is MODULE module-name) + * [specification-part] + * [module-subprogram-part] + * end-module-stmt (is END [MODULE [module-name]]) + * + * module-subprogram-part + * contains-stmt (is CONTAINS) + * module-subprogram + * [module-subprogram] ... + * + * module-subprogram + * is function-subprogram + * or subroutine-subprogram + */ +static void parseModule (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_module)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_MODULE); + ancestorPush (token); + skipToNextStatement (token); + parseSpecificationPart (token); + if (isKeyword (token, KEYWORD_contains)) + parseInternalSubprogramPart (token); + while (! isKeyword (token, KEYWORD_end)) + skipToNextStatement (token); + readSubToken (token); + /* secondary token should be KEYWORD_NONE or KEYWORD_module token */ + skipToNextStatement (token); + ancestorPop (); +} + +/* execution-part + * executable-construct + * + * executable-contstruct is + * execution-part-construct [execution-part-construct] + * + * execution-part-construct + * is executable-construct + * or format-stmt + * or data-stmt + * or entry-stmt + */ +static boolean parseExecutionPart (tokenInfo *const token) +{ + boolean result = FALSE; + boolean done = FALSE; + while (! done) + { + switch (token->keyword) + { + default: + if (isSubprogramPrefix (token)) + readToken (token); + else + skipToNextStatement (token); + result = TRUE; + break; + + case KEYWORD_entry: + parseEntryStmt (token); + result = TRUE; + break; + + case KEYWORD_contains: + case KEYWORD_function: + case KEYWORD_subroutine: + done = TRUE; + break; + + case KEYWORD_end: + readSubToken (token); + if (isSecondaryKeyword (token, KEYWORD_do) || + isSecondaryKeyword (token, KEYWORD_if) || + isSecondaryKeyword (token, KEYWORD_select) || + isSecondaryKeyword (token, KEYWORD_where)) + { + skipToNextStatement (token); + result = TRUE; + } + else + done = TRUE; + break; + } + } + return result; +} + +static void parseSubprogram (tokenInfo *const token, const tagType tag) +{ + Assert (isKeyword (token, KEYWORD_program) || + isKeyword (token, KEYWORD_function) || + isKeyword (token, KEYWORD_subroutine)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, tag); + ancestorPush (token); + skipToNextStatement (token); + parseSpecificationPart (token); + parseExecutionPart (token); + if (isKeyword (token, KEYWORD_contains)) + parseInternalSubprogramPart (token); + /* should be at KEYWORD_end token */ + readSubToken (token); + /* secondary token should be one of KEYWORD_NONE, KEYWORD_program, + * KEYWORD_function, KEYWORD_function + */ + skipToNextStatement (token); + ancestorPop (); +} + + +/* function-subprogram is + * function-stmt (is [prefix] FUNCTION function-name etc.) + * [specification-part] + * [execution-part] + * [internal-subprogram-part] + * end-function-stmt (is END [FUNCTION [function-name]]) + * + * prefix + * is type-spec [RECURSIVE] + * or [RECURSIVE] type-spec + */ +static void parseFunctionSubprogram (tokenInfo *const token) +{ + parseSubprogram (token, TAG_FUNCTION); +} + +/* subroutine-subprogram is + * subroutine-stmt (is [RECURSIVE] SUBROUTINE subroutine-name etc.) + * [specification-part] + * [execution-part] + * [internal-subprogram-part] + * end-subroutine-stmt (is END [SUBROUTINE [function-name]]) + */ +static void parseSubroutineSubprogram (tokenInfo *const token) +{ + parseSubprogram (token, TAG_SUBROUTINE); +} + +/* main-program is + * [program-stmt] (is PROGRAM program-name) + * [specification-part] + * [execution-part] + * [internal-subprogram-part ] + * end-program-stmt + */ +static void parseMainProgram (tokenInfo *const token) +{ + parseSubprogram (token, TAG_PROGRAM); +} + +/* program-unit + * is main-program + * or external-subprogram (is function-subprogram or subroutine-subprogram) + * or module + * or block-data + */ +static void parseProgramUnit (tokenInfo *const token) +{ + readToken (token); + do + { + if (isType (token, TOKEN_STATEMENT_END)) + readToken (token); + else switch (token->keyword) + { + case KEYWORD_block: parseBlockData (token); break; + case KEYWORD_end: skipToNextStatement (token); break; + case KEYWORD_function: parseFunctionSubprogram (token); break; + case KEYWORD_module: parseModule (token); break; + case KEYWORD_program: parseMainProgram (token); break; + case KEYWORD_subroutine: parseSubroutineSubprogram (token); break; + + default: + if (isSubprogramPrefix (token)) + readToken (token); + else + { + boolean one = parseSpecificationPart (token); + boolean two = parseExecutionPart (token); + if (! (one || two)) + readToken (token); + } + break; + } + } while (TRUE); +} + +static boolean findFortranTags (const unsigned int passCount) +{ + tokenInfo *token; + exception_t exception; + boolean retry; + + Assert (passCount < 3); + Parent = newToken (); + token = newToken (); + FreeSourceForm = (boolean) (passCount > 1); + Column = 0; + exception = (exception_t) setjmp (Exception); + if (exception == ExceptionEOF) + retry = FALSE; + else if (exception == ExceptionFixedFormat && ! FreeSourceForm) + { + verbose ("%s: not fixed source form; retry as free source form\n", + getInputFileName ()); + retry = TRUE; + } + else + { + parseProgramUnit (token); + retry = FALSE; + } + ancestorClear (); + deleteToken (token); + deleteToken (Parent); + + return retry; +} + +static void initialize (const langType language) +{ + Lang_fortran = language; + buildFortranKeywordHash (); +} + +extern parserDefinition* FortranParser (void) +{ + static const char *const extensions [] = { + "f", "for", "ftn", "f77", "f90", "f95", +#ifndef CASE_INSENSITIVE_FILENAMES + "F", "FOR", "FTN", "F77", "F90", "F95", +#endif + NULL + }; + parserDefinition* def = parserNew ("Fortran"); + def->kinds = FortranKinds; + def->kindCount = KIND_COUNT (FortranKinds); + def->extensions = extensions; + def->parser2 = findFortranTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/general.h b/third_party/ctags/general.h new file mode 100644 index 000000000..5bf70a43f --- /dev/null +++ b/third_party/ctags/general.h @@ -0,0 +1,59 @@ +// clang-format off +/* +* $Id: general.h 508 2007-05-03 03:20:59Z dhiebert $ +* +* Copyright (c) 1998-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Provides the general (non-ctags-specific) environment assumed by all. +*/ +#ifndef _GENERAL_H +#define _GENERAL_H +#include "third_party/ctags/config.h" + +/* Define standard error destination + */ +#ifndef errout +# define errout stderr +#endif + +/* Define regex if supported */ +#if (defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)) +# define HAVE_REGEX 1 +#endif + +/* This is a helpful internal feature of later versions (> 2.7) of GCC + * to prevent warnings about unused variables. + */ +#if (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)) && !defined (__GNUG__) +# define __unused __attribute__((__unused__)) +# define __printf(s,f) __attribute__((__format__ (__printf__, s, f))) +#else +# define __unused +# define __printf(s,f) +#endif + +/* +* DATA DECLARATIONS +*/ + +#undef FALSE +#undef TRUE +#ifdef VAXC +typedef enum { FALSE, TRUE } booleanType; +typedef int boolean; +#else +# ifdef __cplusplus +typedef bool boolean; +#define FALSE false +#define TRUE true +# else +typedef enum { FALSE, TRUE } boolean; +# endif +#endif + +#endif /* _GENERAL_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/get.c b/third_party/ctags/get.c new file mode 100644 index 000000000..2d4ea0a40 --- /dev/null +++ b/third_party/ctags/get.c @@ -0,0 +1,671 @@ +// clang-format off +/* +* $Id: get.c 559 2007-06-17 03:30:09Z elliotth $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains the high level source read functions (preprocessor +* directives are handled within this level). +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/get.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* MACROS +*/ +#define stringMatch(s1,s2) (strcmp (s1,s2) == 0) +#define isspacetab(c) ((c) == SPACE || (c) == TAB) + +/* +* DATA DECLARATIONS +*/ +typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment; + +enum eCppLimits { + MaxCppNestingLevel = 20, + MaxDirectiveName = 10 +}; + +/* Defines the one nesting level of a preprocessor conditional. + */ +typedef struct sConditionalInfo { + boolean ignoreAllBranches; /* ignoring parent conditional branch */ + boolean singleBranch; /* choose only one branch */ + boolean branchChosen; /* branch already selected */ + boolean ignoring; /* current ignore state */ +} conditionalInfo; + +enum eState { + DRCTV_NONE, /* no known directive - ignore to end of line */ + DRCTV_DEFINE, /* "#define" encountered */ + DRCTV_HASH, /* initial '#' read; determine directive */ + DRCTV_IF, /* "#if" or "#ifdef" encountered */ + DRCTV_PRAGMA, /* #pragma encountered */ + DRCTV_UNDEF /* "#undef" encountered */ +}; + +/* Defines the current state of the pre-processor. + */ +typedef struct sCppState { + int ungetch, ungetch2; /* ungotten characters, if any */ + boolean resolveRequired; /* must resolve if/else/elif/endif branch */ + boolean hasAtLiteralStrings; /* supports @"c:\" strings */ + struct sDirective { + enum eState state; /* current directive being processed */ + boolean accept; /* is a directive syntactically permitted? */ + vString * name; /* macro name */ + unsigned int nestLevel; /* level 0 is not used */ + conditionalInfo ifdef [MaxCppNestingLevel]; + } directive; +} cppState; + +/* +* DATA DEFINITIONS +*/ + +/* Use brace formatting to detect end of block. + */ +static boolean BraceFormat = FALSE; + +static cppState Cpp = { + '\0', '\0', /* ungetch characters */ + FALSE, /* resolveRequired */ + FALSE, /* hasAtLiteralStrings */ + { + DRCTV_NONE, /* state */ + FALSE, /* accept */ + NULL, /* tag name */ + 0, /* nestLevel */ + { {FALSE,FALSE,FALSE,FALSE} } /* ifdef array */ + } /* directive */ +}; + +/* +* FUNCTION DEFINITIONS +*/ + +extern boolean isBraceFormat (void) +{ + return BraceFormat; +} + +extern unsigned int getDirectiveNestLevel (void) +{ + return Cpp.directive.nestLevel; +} + +extern void cppInit (const boolean state, const boolean hasAtLiteralStrings) +{ + BraceFormat = state; + + Cpp.ungetch = '\0'; + Cpp.ungetch2 = '\0'; + Cpp.resolveRequired = FALSE; + Cpp.hasAtLiteralStrings = hasAtLiteralStrings; + + Cpp.directive.state = DRCTV_NONE; + Cpp.directive.accept = TRUE; + Cpp.directive.nestLevel = 0; + + Cpp.directive.ifdef [0].ignoreAllBranches = FALSE; + Cpp.directive.ifdef [0].singleBranch = FALSE; + Cpp.directive.ifdef [0].branchChosen = FALSE; + Cpp.directive.ifdef [0].ignoring = FALSE; + + if (Cpp.directive.name == NULL) + Cpp.directive.name = vStringNew (); + else + vStringClear (Cpp.directive.name); +} + +extern void cppTerminate (void) +{ + if (Cpp.directive.name != NULL) + { + vStringDelete (Cpp.directive.name); + Cpp.directive.name = NULL; + } +} + +extern void cppBeginStatement (void) +{ + Cpp.resolveRequired = TRUE; +} + +extern void cppEndStatement (void) +{ + Cpp.resolveRequired = FALSE; +} + +/* +* Scanning functions +* +* This section handles preprocessor directives. It strips out all +* directives and may emit a tag for #define directives. +*/ + +/* This puts a character back into the input queue for the source File. + * Up to two characters may be ungotten. + */ +extern void cppUngetc (const int c) +{ + Assert (Cpp.ungetch2 == '\0'); + Cpp.ungetch2 = Cpp.ungetch; + Cpp.ungetch = c; +} + +/* Reads a directive, whose first character is given by "c", into "name". + */ +static boolean readDirective (int c, char *const name, unsigned int maxLength) +{ + unsigned int i; + + for (i = 0 ; i < maxLength - 1 ; ++i) + { + if (i > 0) + { + c = fileGetc (); + if (c == EOF || ! isalpha (c)) + { + fileUngetc (c); + break; + } + } + name [i] = c; + } + name [i] = '\0'; /* null terminate */ + + return (boolean) isspacetab (c); +} + +/* Reads an identifier, whose first character is given by "c", into "tag", + * together with the file location and corresponding line number. + */ +static void readIdentifier (int c, vString *const name) +{ + vStringClear (name); + do + { + vStringPut (name, c); + } while (c = fileGetc (), (c != EOF && isident (c))); + fileUngetc (c); + vStringTerminate (name); +} + +static conditionalInfo *currentConditional (void) +{ + return &Cpp.directive.ifdef [Cpp.directive.nestLevel]; +} + +static boolean isIgnore (void) +{ + return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring; +} + +static boolean setIgnore (const boolean ignore) +{ + return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore; +} + +static boolean isIgnoreBranch (void) +{ + conditionalInfo *const ifdef = currentConditional (); + + /* Force a single branch if an incomplete statement is discovered + * en route. This may have allowed earlier branches containing complete + * statements to be followed, but we must follow no further branches. + */ + if (Cpp.resolveRequired && ! BraceFormat) + ifdef->singleBranch = TRUE; + + /* We will ignore this branch in the following cases: + * + * 1. We are ignoring all branches (conditional was within an ignored + * branch of the parent conditional) + * 2. A branch has already been chosen and either of: + * a. A statement was incomplete upon entering the conditional + * b. A statement is incomplete upon encountering a branch + */ + return (boolean) (ifdef->ignoreAllBranches || + (ifdef->branchChosen && ifdef->singleBranch)); +} + +static void chooseBranch (void) +{ + if (! BraceFormat) + { + conditionalInfo *const ifdef = currentConditional (); + + ifdef->branchChosen = (boolean) (ifdef->singleBranch || + Cpp.resolveRequired); + } +} + +/* Pushes one nesting level for an #if directive, indicating whether or not + * the branch should be ignored and whether a branch has already been chosen. + */ +static boolean pushConditional (const boolean firstBranchChosen) +{ + const boolean ignoreAllBranches = isIgnore (); /* current ignore */ + boolean ignoreBranch = FALSE; + + if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1) + { + conditionalInfo *ifdef; + + ++Cpp.directive.nestLevel; + ifdef = currentConditional (); + + /* We take a snapshot of whether there is an incomplete statement in + * progress upon encountering the preprocessor conditional. If so, + * then we will flag that only a single branch of the conditional + * should be followed. + */ + ifdef->ignoreAllBranches = ignoreAllBranches; + ifdef->singleBranch = Cpp.resolveRequired; + ifdef->branchChosen = firstBranchChosen; + ifdef->ignoring = (boolean) (ignoreAllBranches || ( + ! firstBranchChosen && ! BraceFormat && + (ifdef->singleBranch || !Option.if0))); + ignoreBranch = ifdef->ignoring; + } + return ignoreBranch; +} + +/* Pops one nesting level for an #endif directive. + */ +static boolean popConditional (void) +{ + if (Cpp.directive.nestLevel > 0) + --Cpp.directive.nestLevel; + + return isIgnore (); +} + +static void makeDefineTag (const char *const name) +{ + const boolean isFileScope = (boolean) (! isHeaderFile ()); + + if (includingDefineTags () && + (! isFileScope || Option.include.fileScope)) + { + tagEntryInfo e; + initTagEntry (&e, name); + e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN); + e.isFileScope = isFileScope; + e.truncateLine = TRUE; + e.kindName = "macro"; + e.kind = 'd'; + makeTagEntry (&e); + } +} + +static void directiveDefine (const int c) +{ + if (isident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + if (! isIgnore ()) + makeDefineTag (vStringValue (Cpp.directive.name)); + } + Cpp.directive.state = DRCTV_NONE; +} + +static void directivePragma (int c) +{ + if (isident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + if (stringMatch (vStringValue (Cpp.directive.name), "weak")) + { + /* generate macro tag for weak name */ + do + { + c = fileGetc (); + } while (c == SPACE); + if (isident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + makeDefineTag (vStringValue (Cpp.directive.name)); + } + } + } + Cpp.directive.state = DRCTV_NONE; +} + +static boolean directiveIf (const int c) +{ + DebugStatement ( const boolean ignore0 = isIgnore (); ) + const boolean ignore = pushConditional ((boolean) (c != '0')); + + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( debugCppNest (TRUE, Cpp.directive.nestLevel); + if (ignore != ignore0) debugCppIgnore (ignore); ) + + return ignore; +} + +static boolean directiveHash (const int c) +{ + boolean ignore = FALSE; + char directive [MaxDirectiveName]; + DebugStatement ( const boolean ignore0 = isIgnore (); ) + + readDirective (c, directive, MaxDirectiveName); + if (stringMatch (directive, "define")) + Cpp.directive.state = DRCTV_DEFINE; + else if (stringMatch (directive, "undef")) + Cpp.directive.state = DRCTV_UNDEF; + else if (strncmp (directive, "if", (size_t) 2) == 0) + Cpp.directive.state = DRCTV_IF; + else if (stringMatch (directive, "elif") || + stringMatch (directive, "else")) + { + ignore = setIgnore (isIgnoreBranch ()); + if (! ignore && stringMatch (directive, "else")) + chooseBranch (); + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) + } + else if (stringMatch (directive, "endif")) + { + DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); ) + ignore = popConditional (); + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) + } + else if (stringMatch (directive, "pragma")) + Cpp.directive.state = DRCTV_PRAGMA; + else + Cpp.directive.state = DRCTV_NONE; + + return ignore; +} + +/* Handles a pre-processor directive whose first character is given by "c". + */ +static boolean handleDirective (const int c) +{ + boolean ignore = isIgnore (); + + switch (Cpp.directive.state) + { + case DRCTV_NONE: ignore = isIgnore (); break; + case DRCTV_DEFINE: directiveDefine (c); break; + case DRCTV_HASH: ignore = directiveHash (c); break; + case DRCTV_IF: ignore = directiveIf (c); break; + case DRCTV_PRAGMA: directivePragma (c); break; + case DRCTV_UNDEF: directiveDefine (c); break; + } + return ignore; +} + +/* Called upon reading of a slash ('/') characters, determines whether a + * comment is encountered, and its type. + */ +static Comment isComment (void) +{ + Comment comment; + const int next = fileGetc (); + + if (next == '*') + comment = COMMENT_C; + else if (next == '/') + comment = COMMENT_CPLUS; + else + { + fileUngetc (next); + comment = COMMENT_NONE; + } + return comment; +} + +/* Skips over a C style comment. According to ANSI specification a comment + * is treated as white space, so we perform this substitution. + */ +int skipOverCComment (void) +{ + int c = fileGetc (); + + while (c != EOF) + { + if (c != '*') + c = fileGetc (); + else + { + const int next = fileGetc (); + + if (next != '/') + c = next; + else + { + c = SPACE; /* replace comment with space */ + break; + } + } + } + return c; +} + +/* Skips over a C++ style comment. + */ +static int skipOverCplusComment (void) +{ + int c; + + while ((c = fileGetc ()) != EOF) + { + if (c == BACKSLASH) + fileGetc (); /* throw away next character, too */ + else if (c == NEWLINE) + break; + } + return c; +} + +/* Skips to the end of a string, returning a special character to + * symbolically represent a generic string. + */ +static int skipToEndOfString (boolean ignoreBackslash) +{ + int c; + + while ((c = fileGetc ()) != EOF) + { + if (c == BACKSLASH && ! ignoreBackslash) + fileGetc (); /* throw away next character, too */ + else if (c == DOUBLE_QUOTE) + break; + } + return STRING_SYMBOL; /* symbolic representation of string */ +} + +/* Skips to the end of the three (possibly four) 'c' sequence, returning a + * special character to symbolically represent a generic character. + * Also detects Vera numbers that include a base specifier (ie. 'b1010). + */ +static int skipToEndOfChar (void) +{ + int c; + int count = 0, veraBase = '\0'; + + while ((c = fileGetc ()) != EOF) + { + ++count; + if (c == BACKSLASH) + fileGetc (); /* throw away next character, too */ + else if (c == SINGLE_QUOTE) + break; + else if (c == NEWLINE) + { + fileUngetc (c); + break; + } + else if (count == 1 && strchr ("DHOB", toupper (c)) != NULL) + veraBase = c; + else if (veraBase != '\0' && ! isalnum (c)) + { + fileUngetc (c); + break; + } + } + return CHAR_SYMBOL; /* symbolic representation of character */ +} + +/* This function returns the next character, stripping out comments, + * C pre-processor directives, and the contents of single and double + * quoted strings. In short, strip anything which places a burden upon + * the tokenizer. + */ +extern int cppGetc (void) +{ + boolean directive = FALSE; + boolean ignore = FALSE; + int c; + + if (Cpp.ungetch != '\0') + { + c = Cpp.ungetch; + Cpp.ungetch = Cpp.ungetch2; + Cpp.ungetch2 = '\0'; + return c; /* return here to avoid re-calling debugPutc () */ + } + else do + { + c = fileGetc (); +process: + switch (c) + { + case EOF: + ignore = FALSE; + directive = FALSE; + break; + + case TAB: + case SPACE: + break; /* ignore most white space */ + + case NEWLINE: + if (directive && ! ignore) + directive = FALSE; + Cpp.directive.accept = TRUE; + break; + + case DOUBLE_QUOTE: + Cpp.directive.accept = FALSE; + c = skipToEndOfString (FALSE); + break; + + case '#': + if (Cpp.directive.accept) + { + directive = TRUE; + Cpp.directive.state = DRCTV_HASH; + Cpp.directive.accept = FALSE; + } + break; + + case SINGLE_QUOTE: + Cpp.directive.accept = FALSE; + c = skipToEndOfChar (); + break; + + case '/': + { + const Comment comment = isComment (); + + if (comment == COMMENT_C) + c = skipOverCComment (); + else if (comment == COMMENT_CPLUS) + { + c = skipOverCplusComment (); + if (c == NEWLINE) + fileUngetc (c); + } + else + Cpp.directive.accept = FALSE; + break; + } + + case BACKSLASH: + { + int next = fileGetc (); + + if (next == NEWLINE) + continue; + else if (next == '?') + cppUngetc (next); + else + fileUngetc (next); + break; + } + + case '?': + { + int next = fileGetc (); + if (next != '?') + fileUngetc (next); + else + { + next = fileGetc (); + switch (next) + { + case '(': c = '['; break; + case ')': c = ']'; break; + case '<': c = '{'; break; + case '>': c = '}'; break; + case '/': c = BACKSLASH; goto process; + case '!': c = '|'; break; + case SINGLE_QUOTE: c = '^'; break; + case '-': c = '~'; break; + case '=': c = '#'; goto process; + default: + fileUngetc (next); + cppUngetc ('?'); + break; + } + } + } break; + + default: + if (c == '@' && Cpp.hasAtLiteralStrings) + { + int next = fileGetc (); + if (next == DOUBLE_QUOTE) + { + Cpp.directive.accept = FALSE; + c = skipToEndOfString (TRUE); + break; + } + } + Cpp.directive.accept = FALSE; + if (directive) + ignore = handleDirective (c); + break; + } + } while (directive || ignore); + + DebugStatement ( debugPutc (DEBUG_CPP, c); ) + DebugStatement ( if (c == NEWLINE) + debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); ) + + return c; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/get.h b/third_party/ctags/get.h new file mode 100644 index 000000000..9b09d9c70 --- /dev/null +++ b/third_party/ctags/get.h @@ -0,0 +1,51 @@ +// clang-format off +/* +* $Id: get.h 525 2007-05-28 01:50:41Z elliotth $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to get.c +*/ +#ifndef _GET_H +#define _GET_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "third_party/ctags/ctags.h" /* to define langType */ + +/* +* MACROS +*/ +/* Is the character valid as a character of a C identifier? + * VMS allows '$' in identifiers. + */ +#define isident(c) (isalnum(c) || (c) == '_' || (c) == '$') + +/* Is the character valid as the first character of a C identifier? + * C++ allows '~' in destructors. + * VMS allows '$' in identifiers. + */ +#define isident1(c) (isalpha(c) || (c) == '_' || (c) == '~' || (c) == '$') + +/* +* FUNCTION PROTOTYPES +*/ +extern boolean isBraceFormat (void); +extern unsigned int getDirectiveNestLevel (void); +extern void cppInit (const boolean state, const boolean hasAtLiteralStrings); +extern void cppTerminate (void); +extern void cppBeginStatement (void); +extern void cppEndStatement (void); +extern void cppUngetc (const int c); +extern int cppGetc (void); +extern int skipOverCComment (void); + +#endif /* _GET_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/go.c b/third_party/ctags/go.c new file mode 100644 index 000000000..f3ee35050 --- /dev/null +++ b/third_party/ctags/go.c @@ -0,0 +1,671 @@ +// clang-format off +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ +#include "libc/runtime/runtime.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/main.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" +#include "third_party/ctags/options.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_package, + KEYWORD_import, + KEYWORD_const, + KEYWORD_type, + KEYWORD_var, + KEYWORD_func, + KEYWORD_struct, + KEYWORD_interface, + KEYWORD_map, + KEYWORD_chan +} keywordId; + +/* Used to determine whether keyword is valid for the current language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_NONE = -1, + TOKEN_CHARACTER, + // Don't need TOKEN_FORWARD_SLASH + TOKEN_FORWARD_SLASH, + TOKEN_KEYWORD, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_OPEN_PAREN, + TOKEN_CLOSE_PAREN, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_SEMICOLON, + TOKEN_STAR, + TOKEN_LEFT_ARROW, + TOKEN_DOT, + TOKEN_COMMA +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString *string; /* the name of the token */ + unsigned long lineNumber; /* line number of tag */ + fpos_t filePosition; /* file position of line containing name */ +} tokenInfo; + +/* +* DATA DEFINITIONS +*/ + +static int Lang_go; +static jmp_buf Exception; +static vString *scope; + +typedef enum { + GOTAG_UNDEFINED = -1, + GOTAG_PACKAGE, + GOTAG_FUNCTION, + GOTAG_CONST, + GOTAG_TYPE, + GOTAG_VAR, +} goKind; + +static kindOption GoKinds[] = { + {TRUE, 'p', "package", "packages"}, + {TRUE, 'f', "func", "functions"}, + {TRUE, 'c', "const", "constants"}, + {TRUE, 't', "type", "types"}, + {TRUE, 'v', "var", "variables"} +}; + +static keywordDesc GoKeywordTable[] = { + {"package", KEYWORD_package}, + {"import", KEYWORD_import}, + {"const", KEYWORD_const}, + {"type", KEYWORD_type}, + {"var", KEYWORD_var}, + {"func", KEYWORD_func}, + {"struct", KEYWORD_struct}, + {"interface", KEYWORD_interface}, + {"map", KEYWORD_map}, + {"chan", KEYWORD_chan} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +// XXX UTF-8 +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '@' || c == '_' || c == '#' || c > 128); +} + +static void initialize (const langType language) +{ + size_t i; + const size_t count = + sizeof (GoKeywordTable) / sizeof (GoKeywordTable[0]); + Lang_go = language; + for (i = 0; i < count; ++i) + { + const keywordDesc *const p = &GoKeywordTable[i]; + addKeyword (p->name, language, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + return token; +} + +static void deleteToken (tokenInfo * const token) +{ + if (token != NULL) + { + vStringDelete (token->string); + eFree (token); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (!end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\' && delimiter != '`') + { + c = fileGetc (); /* This maybe a ' or ". */ + vStringPut (string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + //Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + fileUngetc (c); /* always unget, LF might add a semicolon */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + static tokenType lastTokenType = TOKEN_NONE; + + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || + lastTokenType == TOKEN_STRING || + lastTokenType == TOKEN_CLOSE_PAREN || + lastTokenType == TOKEN_CLOSE_CURLY || + lastTokenType == TOKEN_CLOSE_SQUARE)) + { + token->type = TOKEN_SEMICOLON; + goto done; + } + } + while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); + + switch (c) + { + case EOF: + longjmp (Exception, (int)ExceptionEOF); + break; + + case '/': + { + boolean hasNewline = FALSE; + int d = fileGetc (); + switch (d) + { + case '/': + fileSkipToCharacter ('\n'); + /* Line comments start with the + * character sequence // and + * continue through the next + * newline. A line comment acts + * like a newline. */ + fileUngetc ('\n'); + goto getNextChar; + case '*': + do + { + int d; + do + { + d = fileGetc (); + if (d == '\n') + { + hasNewline = TRUE; + } + } while (d != EOF && d != '*'); + + c = fileGetc (); + if (c == '/') + break; + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + + fileUngetc (hasNewline ? '\n' : ' '); + goto getNextChar; + default: + token->type = TOKEN_FORWARD_SLASH; + fileUngetc (d); + break; + } + } + break; + + case '"': + case '\'': + case '`': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '<': + { + int d = fileGetc (); + if (d == '-') + { + token->type = TOKEN_LEFT_ARROW; + break; + } + else + goto getNextChar; + } + + case '(': + token->type = TOKEN_OPEN_PAREN; + break; + + case ')': + token->type = TOKEN_CLOSE_PAREN; + break; + + case '{': + token->type = TOKEN_OPEN_CURLY; + break; + + case '}': + token->type = TOKEN_CLOSE_CURLY; + break; + + case '[': + token->type = TOKEN_OPEN_SQUARE; + break; + + case ']': + token->type = TOKEN_CLOSE_SQUARE; + break; + + case '*': + token->type = TOKEN_STAR; + break; + + case '.': + token->type = TOKEN_DOT; + break; + + case ',': + token->type = TOKEN_COMMA; + break; + + default: + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + break; + } + +done: + lastTokenType = token->type; +} + +static void skipToMatched (tokenInfo *const token) +{ + int nest_level = 0; + tokenType open_token; + tokenType close_token; + + switch (token->type) + { + case TOKEN_OPEN_PAREN: + open_token = TOKEN_OPEN_PAREN; + close_token = TOKEN_CLOSE_PAREN; + break; + case TOKEN_OPEN_CURLY: + open_token = TOKEN_OPEN_CURLY; + close_token = TOKEN_CLOSE_CURLY; + break; + case TOKEN_OPEN_SQUARE: + open_token = TOKEN_OPEN_SQUARE; + close_token = TOKEN_CLOSE_SQUARE; + break; + default: + return; + } + + /* + * This routine will skip to a matching closing token. + * It will also handle nested tokens like the (, ) below. + * ( name varchar(30), text binary(10) ) + */ + if (isType (token, open_token)) + { + nest_level++; + while (!(isType (token, close_token) && (nest_level == 0))) + { + readToken (token); + if (isType (token, open_token)) + { + nest_level++; + } + if (isType (token, close_token)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void skipType (tokenInfo *const token) +{ +again: + // Type = TypeName | TypeLit | "(" Type ")" . + if (isType (token, TOKEN_OPEN_PAREN)) + { + skipToMatched (token); + return; + } + + // TypeName = QualifiedIdent. + // QualifiedIdent = [ PackageName "." ] identifier . + // PackageName = identifier . + if (isType (token, TOKEN_IDENTIFIER)) + { + readToken (token); + if (isType (token, TOKEN_DOT)) + { + readToken (token); + Assert (isType (token, TOKEN_IDENTIFIER)); + readToken (token); + } + return; + } + + // StructType = "struct" "{" { FieldDecl ";" } "}" + // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . + if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface)) + { + readToken (token); + Assert (isType (token, TOKEN_OPEN_CURLY)); + skipToMatched (token); + return; + } + + // ArrayType = "[" ArrayLength "]" ElementType . + // SliceType = "[" "]" ElementType . + // ElementType = Type . + if (isType (token, TOKEN_OPEN_SQUARE)) + { + skipToMatched (token); + goto again; + } + + // PointerType = "*" BaseType . + // BaseType = Type . + // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType . + if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW)) + { + readToken (token); + goto again; + } + + // MapType = "map" "[" KeyType "]" ElementType . + // KeyType = Type . + if (isKeyword (token, KEYWORD_map)) + { + readToken (token); + Assert (isType (token, TOKEN_OPEN_SQUARE)); + skipToMatched (token); + goto again; + } + + // FunctionType = "func" Signature . + // Signature = Parameters [ Result ] . + // Result = Parameters | Type . + // Parameters = "(" [ ParameterList [ "," ] ] ")" . + if (isKeyword (token, KEYWORD_func)) + { + readToken (token); + Assert (isType (token, TOKEN_OPEN_PAREN)); + // Parameters + skipToMatched (token); + // Result is parameters or type or nothing. skipType treats anything + // surrounded by parentheses as a type, and does nothing if what + // follows is not a type. + goto again; + } +} + +// Skip to the next semicolon, skipping over matching brackets. +static void skipToTopLevelSemicolon (tokenInfo *const token) +{ + while (!isType (token, TOKEN_SEMICOLON)) + { + readToken (token); + skipToMatched (token); + } +} + +static void makeTag (tokenInfo *const token, const goKind kind) +{ + const char *const name = vStringValue (token->string); + + tagEntryInfo e; + initTagEntry (&e, name); + + if (!GoKinds [kind].enabled) + return; + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = GoKinds [kind].name; + e.kind = GoKinds [kind].letter; + + makeTagEntry (&e); + + if (scope && Option.include.qualifiedTags) + { + vString *qualifiedName = vStringNew (); + vStringCopy (qualifiedName, scope); + vStringCatS (qualifiedName, "."); + vStringCat (qualifiedName, token->string); + e.name = vStringValue (qualifiedName); + makeTagEntry (&e); + vStringDelete (qualifiedName); + } +} + +static void parsePackage (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + readToken (name); + Assert (isType (name, TOKEN_IDENTIFIER)); + makeTag (name, GOTAG_PACKAGE); + if (!scope && Option.include.qualifiedTags) + { + scope = vStringNew (); + vStringCopy (scope, name->string); + } + + deleteToken (name); +} + +static void parseFunctionOrMethod (tokenInfo *const token) +{ + // FunctionDecl = "func" identifier Signature [ Body ] . + // Body = Block. + // + // MethodDecl = "func" Receiver MethodName Signature [ Body ] . + // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" . + // BaseTypeName = identifier . + tokenInfo *const name = newToken (); + + // Skip over receiver. + readToken (name); + if (isType (name, TOKEN_OPEN_PAREN)) + skipToMatched (name); + + Assert (isType (name, TOKEN_IDENTIFIER)); + + // Skip over parameters. + readToken (token); + skipToMatched (token); + + // Skip over result. + skipType (token); + + // Skip over function body. + if (isType (token, TOKEN_OPEN_CURLY)) + skipToMatched (token); + + makeTag (name, GOTAG_FUNCTION); + + deleteToken (name); +} + +static void parseConstTypeVar (tokenInfo *const token, goKind kind) +{ + // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . + // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . + // IdentifierList = identifier { "," identifier } . + // ExpressionList = Expression { "," Expression } . + // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . + // TypeSpec = identifier Type . + // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . + // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . + tokenInfo *const name = newToken (); + boolean usesParens = FALSE; + + readToken (name); + + if (isType (name, TOKEN_OPEN_PAREN)) + { + usesParens = TRUE; + readToken (name); + } + +again: + while (1) + { + makeTag (name, kind); + readToken (token); + if (!isType (token, TOKEN_COMMA) && !isType (token, TOKEN_CLOSE_PAREN)) + break; + readToken (name); + } + + skipType (token); + skipToTopLevelSemicolon (token); + + if (usesParens) + { + readToken (name); + if (!isType (name, TOKEN_CLOSE_PAREN)) + goto again; + } + + deleteToken (name); +} + +static void parseGoFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_package: + parsePackage (token); + break; + case KEYWORD_func: + parseFunctionOrMethod (token); + break; + case KEYWORD_const: + parseConstTypeVar (token, GOTAG_CONST); + break; + case KEYWORD_type: + parseConstTypeVar (token, GOTAG_TYPE); + break; + case KEYWORD_var: + parseConstTypeVar (token, GOTAG_VAR); + break; + default: + break; + } + } + } while (TRUE); +} + +static void findGoTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + parseGoFile (token); + + deleteToken (token); + vStringDelete (scope); + scope = NULL; +} + +extern parserDefinition *GoParser (void) +{ + static const char *const extensions[] = { "go", NULL }; + parserDefinition *def = parserNew ("Go"); + def->kinds = GoKinds; + def->kindCount = KIND_COUNT (GoKinds); + def->extensions = extensions; + def->parser = findGoTags; + def->initialize = initialize; + return def; +} diff --git a/third_party/ctags/html.c b/third_party/ctags/html.c new file mode 100644 index 000000000..cc6733507 --- /dev/null +++ b/third_party/ctags/html.c @@ -0,0 +1,50 @@ +// clang-format off +/* +* $Id: html.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for HTML language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ +#include "third_party/ctags/parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installHtmlRegex (const langType language) +{ +#define POSSIBLE_ATTRIBUTES "([ \t]+[a-z]+=\"?[^>\"]*\"?)*" + addTagRegex (language, + "\"]+)\"?" + POSSIBLE_ATTRIBUTES + "[ \t]*>", + "\\2", "a,anchor,named anchors", "i"); + + addTagRegex (language, "^[ \t]*function[ \t]*([A-Za-z0-9_]+)[ \t]*\\(", + "\\1", "f,function,JavaScript functions", NULL); +} + +/* Create parser definition stucture */ +extern parserDefinition* HtmlParser (void) +{ + static const char *const extensions [] = { "htm", "html", NULL }; + parserDefinition *const def = parserNew ("HTML"); + def->extensions = extensions; + def->initialize = installHtmlRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/jscript.c b/third_party/ctags/jscript.c new file mode 100644 index 000000000..b82cfa81c --- /dev/null +++ b/third_party/ctags/jscript.c @@ -0,0 +1,1691 @@ +// clang-format off +/* + * $Id: jscript.c 763 2010-07-28 14:22:42Z dfishburn $ + * + * Copyright (c) 2003, Darren Hiebert + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for JavaScript language + * files. + * + * This is a good reference for different forms of the function statement: + * http://www.permadi.com/tutorial/jsFunc/ + * Another good reference: + * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide + */ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ +#include "libc/str/str.h" /* to define isalpha () */ +#include "libc/runtime/runtime.h" +#ifdef DEBUG +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#endif + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Tracks class and function names already created + */ +static stringList *ClassNames; +static stringList *FunctionNames; + +/* Used to specify type of keyword. +*/ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_function, + KEYWORD_capital_function, + KEYWORD_object, + KEYWORD_capital_object, + KEYWORD_prototype, + KEYWORD_var, + KEYWORD_new, + KEYWORD_this, + KEYWORD_for, + KEYWORD_while, + KEYWORD_do, + KEYWORD_if, + KEYWORD_else, + KEYWORD_switch, + KEYWORD_try, + KEYWORD_catch, + KEYWORD_finally +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_CHARACTER, + TOKEN_CLOSE_PAREN, + TOKEN_SEMICOLON, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_KEYWORD, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_PERIOD, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_EQUAL_SIGN, + TOKEN_FORWARD_SLASH, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; + int nestLevel; + boolean ignoreTag; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_js; + +static jmp_buf Exception; + +typedef enum { + JSTAG_FUNCTION, + JSTAG_CLASS, + JSTAG_METHOD, + JSTAG_PROPERTY, + JSTAG_VARIABLE, + JSTAG_COUNT +} jsKind; + +static kindOption JsKinds [] = { + { TRUE, 'f', "function", "functions" }, + { TRUE, 'c', "class", "classes" }, + { TRUE, 'm', "method", "methods" }, + { TRUE, 'p', "property", "properties" }, + { TRUE, 'v', "variable", "global variables" } +}; + +static const keywordDesc JsKeywordTable [] = { + /* keyword keyword ID */ + { "function", KEYWORD_function }, + { "Function", KEYWORD_capital_function }, + { "object", KEYWORD_object }, + { "Object", KEYWORD_capital_object }, + { "prototype", KEYWORD_prototype }, + { "var", KEYWORD_var }, + { "new", KEYWORD_new }, + { "this", KEYWORD_this }, + { "for", KEYWORD_for }, + { "while", KEYWORD_while }, + { "do", KEYWORD_do }, + { "if", KEYWORD_if }, + { "else", KEYWORD_else }, + { "switch", KEYWORD_switch }, + { "try", KEYWORD_try }, + { "catch", KEYWORD_catch }, + { "finally", KEYWORD_finally } +}; + +/* + * FUNCTION DEFINITIONS + */ + +/* Recursive functions */ +static void parseFunction (tokenInfo *const token); +static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent); +static boolean parseLine (tokenInfo *const token, boolean is_inside_class); + +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '@' || c == '_' || c == '#'); +} + +static void buildJsKeywordHash (void) +{ + const size_t count = sizeof (JsKeywordTable) / + sizeof (JsKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &JsKeywordTable [i]; + addKeyword (p->name, Lang_js, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->nestLevel = 0; + token->ignoreTag = FALSE; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +/* + * Tag generation functions + */ + +/* +static void makeConstTag (tokenInfo *const token, const jsKind kind) +{ + if (JsKinds [kind].enabled && ! token->ignoreTag ) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = JsKinds [kind].name; + e.kind = JsKinds [kind].letter; + + makeTagEntry (&e); + } +} + +static void makeJsTag (tokenInfo *const token, const jsKind kind) +{ + vString * fulltag; + + if (JsKinds [kind].enabled && ! token->ignoreTag ) + { + * + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + * + if ( vStringLength(token->scope) > 0 ) + { + * + fulltag = vStringNew (); + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + vStringTerminate(fulltag); + vStringCopy(token->string, fulltag); + vStringDelete (fulltag); + * + jsKind parent_kind = JSTAG_CLASS; + + * + * if we're creating a function (and not a method), + * guess we're inside another function + * + if (kind == JSTAG_FUNCTION) + parent_kind = JSTAG_FUNCTION; + + e.extensionFields.scope[0] = JsKinds [parent_kind].name; + e.extensionFields.scope[1] = vStringValue (token->scope); + } + * makeConstTag (token, kind); * + makeTagEntry (&e); + } +} +*/ + +static void makeJsTag (tokenInfo *const token, const jsKind kind) +{ + if (JsKinds [kind].enabled && ! token->ignoreTag ) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = JsKinds [kind].name; + e.kind = JsKinds [kind].letter; + + if ( vStringLength(token->scope) > 0 ) + { + jsKind parent_kind = JSTAG_CLASS; + + /* + * If we're creating a function (and not a method), + * guess we're inside another function + */ + if (kind == JSTAG_FUNCTION) + parent_kind = JSTAG_FUNCTION; + + e.extensionFields.scope[0] = JsKinds [parent_kind].name; + e.extensionFields.scope[1] = vStringValue (token->scope); + } + + makeTagEntry (&e); + } +} + +static void makeClassTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + stringListAdd (ClassNames, vStringNewCopy (fulltag)); + makeJsTag (token, JSTAG_CLASS); + } + vStringDelete (fulltag); + } +} + +static void makeFunctionTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) ) + { + stringListAdd (FunctionNames, vStringNewCopy (fulltag)); + makeJsTag (token, JSTAG_FUNCTION); + } + vStringDelete (fulltag); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (! end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\') + { + c = fileGetc(); /* This maybe a ' or ". */ + vStringPut(string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* Read a C identifier beginning with "firstChar" and places it into + * "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ';': token->type = TOKEN_SEMICOLON; break; + case ',': token->type = TOKEN_COMMA; break; + case '.': token->type = TOKEN_PERIOD; break; + case ':': token->type = TOKEN_COLON; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case '=': token->type = TOKEN_EQUAL_SIGN; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + + case '\'': + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '\\': + c = fileGetc (); + if (c != '\\' && c != '"' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_CHARACTER; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '/': + { + int d = fileGetc (); + if ( (d != '*') && /* is this the start of a comment? */ + (d != '/') ) /* is a one line comment? */ + { + token->type = TOKEN_FORWARD_SLASH; + fileUngetc (d); + } + else + { + if (d == '*') + { + do + { + fileSkipToCharacter ('*'); + c = fileGetc (); + if (c == '/') + break; + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + goto getNextChar; + } + else if (d == '/') /* is this the start of a comment? */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + } + break; + } + + default: + if (! isIdentChar (c)) + token->type = TOKEN_UNDEFINED; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_js); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->nestLevel = src->nestLevel; + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + vStringCopy(dest->string, src->string); + vStringCopy(dest->scope, src->scope); +} + +/* + * Token parsing functions + */ + +static void skipArgumentList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Other databases can have arguments with fully declared + * datatypes: + * ( name varchar(30), text binary(10) ) + * So we must check for nested open and closing parantheses + */ + + if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_PAREN)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void skipArrayList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Handle square brackets + * var name[1] + * So we must check for nested open and closing square brackets + */ + + if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_SQUARE)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void addContext (tokenInfo* const parent, const tokenInfo* const child) +{ + if (vStringLength (parent->string) > 0) + { + vStringCatS (parent->string, "."); + } + vStringCatS (parent->string, vStringValue(child->string)); + vStringTerminate(parent->string); +} + +static void addToScope (tokenInfo* const token, vString* const extra) +{ + if (vStringLength (token->scope) > 0) + { + vStringCatS (token->scope, "."); + } + vStringCatS (token->scope, vStringValue(extra)); + vStringTerminate(token->scope); +} + +/* + * Scanning functions + */ + +static void findCmdTerm (tokenInfo *const token) +{ + /* + * Read until we find either a semicolon or closing brace. + * Any nested braces will be handled within. + */ + while (! ( isType (token, TOKEN_SEMICOLON) || + isType (token, TOKEN_CLOSE_CURLY) ) ) + { + /* Handle nested blocks */ + if ( isType (token, TOKEN_OPEN_CURLY)) + { + parseBlock (token, token); + } + else if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } + else + { + readToken (token); + } + } +} + +static void parseSwitch (tokenInfo *const token) +{ + /* + * switch (expression){ + * case value1: + * statement; + * break; + * case value2: + * statement; + * break; + * default : statement; + * } + */ + + readToken (token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + +} + +static void parseLoop (tokenInfo *const token) +{ + /* + * Handles these statements + * for (x=0; x<3; x++) + * document.write("This text is repeated three times
"); + * + * for (x=0; x<3; x++) + * { + * document.write("This text is repeated three times
"); + * } + * + * while (number<5){ + * document.write(number+"
"); + * number++; + * } + * + * do{ + * document.write(number+"
"); + * number++; + * } + * while (number<5); + */ + + if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + parseLine(token, FALSE); + } + } + else if (isKeyword (token, KEYWORD_do)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + parseLine(token, FALSE); + } + + readToken(token); + + if (isKeyword (token, KEYWORD_while)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + } + } +} + +static boolean parseIf (tokenInfo *const token) +{ + boolean read_next_token = TRUE; + /* + * If statements have two forms + * if ( ... ) + * one line; + * + * if ( ... ) + * statement; + * else + * statement + * + * if ( ... ) { + * multiple; + * statements; + * } + * + * + * if ( ... ) { + * return elem + * } + * + * This example if correctly written, but the + * else contains only 1 statement without a terminator + * since the function finishes with the closing brace. + * + * function a(flag){ + * if(flag) + * test(1); + * else + * test(2) + * } + * + * TODO: Deal with statements that can optional end + * without a semi-colon. Currently this messes up + * the parsing of blocks. + * Need to somehow detect this has happened, and either + * backup a token, or skip reading the next token if + * that is possible from all code locations. + * + */ + + readToken (token); + + if (isKeyword (token, KEYWORD_if)) + { + /* + * Check for an "else if" and consume the "if" + */ + readToken (token); + } + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + findCmdTerm (token); + + /* + * The IF could be followed by an ELSE statement. + * This too could have two formats, a curly braced + * multiline section, or another single line. + */ + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + readToken (token); + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + if (isKeyword (token, KEYWORD_else)) + read_next_token = parseIf (token); + } + } + } + return read_next_token; +} + +static void parseFunction (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + boolean is_class = FALSE; + + /* + * This deals with these formats + * function validFunctionTwo(a,b) {} + */ + + readToken (name); + /* Add scope in case this is an INNER function */ + addToScope(name, token->scope); + + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + do + { + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + addContext (name, token); + readToken (token); + } + } while (isType (token, TOKEN_PERIOD)); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + is_class = parseBlock (token, name); + if ( is_class ) + makeClassTag (name); + else + makeFunctionTag (name); + } + + findCmdTerm (token); + + deleteToken (name); +} + +static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent) +{ + boolean is_class = FALSE; + boolean read_next_token = TRUE; + vString * saveScope = vStringNew (); + + token->nestLevel++; + /* + * Make this routine a bit more forgiving. + * If called on an open_curly advance it + */ + if ( isType (token, TOKEN_OPEN_CURLY) && + isKeyword(token, KEYWORD_NONE) ) + readToken(token); + + if (! isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * Read until we find the closing brace, + * any nested braces will be handled within + */ + do + { + read_next_token = TRUE; + if (isKeyword (token, KEYWORD_this)) + { + /* + * Means we are inside a class and have found + * a class, not a function + */ + is_class = TRUE; + vStringCopy(saveScope, token->scope); + addToScope (token, parent->string); + + /* + * Ignore the remainder of the line + * findCmdTerm(token); + */ + parseLine (token, is_class); + + vStringCopy(token->scope, saveScope); + } + else if (isKeyword (token, KEYWORD_var)) + { + /* + * Potentially we have found an inner function. + * Set something to indicate the scope + */ + vStringCopy(saveScope, token->scope); + addToScope (token, parent->string); + parseLine (token, is_class); + vStringCopy(token->scope, saveScope); + } + else if (isKeyword (token, KEYWORD_function)) + { + vStringCopy(saveScope, token->scope); + addToScope (token, parent->string); + parseFunction (token); + vStringCopy(token->scope, saveScope); + } + else if (isType (token, TOKEN_OPEN_CURLY)) + { + /* Handle nested blocks */ + parseBlock (token, parent); + } + else + { + /* + * It is possible for a line to have no terminator + * if the following line is a closing brace. + * parseLine will detect this case and indicate + * whether we should read an additional token. + */ + read_next_token = parseLine (token, is_class); + } + + /* + * Always read a new token unless we find a statement without + * a ending terminator + */ + if( read_next_token ) + readToken(token); + + /* + * If we find a statement without a terminator consider the + * block finished, otherwise the stack will be off by one. + */ + } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token ); + } + + vStringDelete(saveScope); + token->nestLevel--; + + return is_class; +} + +static boolean parseMethods (tokenInfo *const token, tokenInfo *const class) +{ + tokenInfo *const name = newToken (); + boolean has_methods = FALSE; + + /* + * This deals with these formats + * validProperty : 2, + * validMethod : function(a,b) {} + * 'validMethod2' : function(a,b) {} + * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false} + */ + + do + { + readToken (token); + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This was most likely a variable declaration of a hash table. + * indicate there were no methods and return. + */ + has_methods = FALSE; + goto cleanUp; + } + + if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE)) + { + copyToken(name, token); + + readToken (token); + if ( isType (token, TOKEN_COLON) ) + { + readToken (token); + if ( isKeyword (token, KEYWORD_function) ) + { + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + has_methods = TRUE; + addToScope (name, class->string); + makeJsTag (name, JSTAG_METHOD); + parseBlock (token, name); + + /* + * Read to the closing curly, check next + * token, if a comma, we must loop again + */ + readToken (token); + } + } + else + { + has_methods = TRUE; + addToScope (name, class->string); + makeJsTag (name, JSTAG_PROPERTY); + + /* + * Read the next token, if a comma + * we must loop again + */ + readToken (token); + } + } + } + } while ( isType(token, TOKEN_COMMA) ); + + findCmdTerm (token); + +cleanUp: + deleteToken (name); + + return has_methods; +} + +static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) +{ + tokenInfo *const name = newToken (); + tokenInfo *const secondary_name = newToken (); + vString * saveScope = vStringNew (); + boolean is_class = FALSE; + boolean is_terminated = TRUE; + boolean is_global = FALSE; + boolean is_prototype = FALSE; + boolean has_methods = FALSE; + vString * fulltag; + + vStringClear(saveScope); + /* + * Functions can be named or unnamed. + * This deals with these formats: + * Function + * validFunctionOne = function(a,b) {} + * testlib.validFunctionFive = function(a,b) {} + * var innerThree = function(a,b) {} + * var innerFour = (a,b) {} + * var D2 = secondary_fcn_name(a,b) {} + * var D3 = new Function("a", "b", "return a+b;"); + * Class + * testlib.extras.ValidClassOne = function(a,b) { + * this.a = a; + * } + * Class Methods + * testlib.extras.ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * ValidClassTwo = function () + * { + * this.validMethodThree = function() {} + * // unnamed method + * this.validMethodFour = () {} + * } + * Database.prototype.validMethodThree = Database_getTodaysDate; + */ + + if ( is_inside_class ) + is_class = TRUE; + /* + * var can preceed an inner function + */ + if ( isKeyword(token, KEYWORD_var) ) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 ) + { + is_global = TRUE; + } + readToken(token); + } + + if ( isKeyword(token, KEYWORD_this) ) + { + readToken(token); + if (isType (token, TOKEN_PERIOD)) + { + readToken(token); + } + } + + copyToken(name, token); + + while (! isType (token, TOKEN_CLOSE_CURLY) && + ! isType (token, TOKEN_SEMICOLON) && + ! isType (token, TOKEN_EQUAL_SIGN) ) + { + /* Potentially the name of the function */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Cannot be a global variable is it has dot references in the name + */ + is_global = FALSE; + do + { + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + if ( is_class ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + } + else + addContext (name, token); + } + else if ( isKeyword(token, KEYWORD_prototype) ) + { + /* + * When we reach the "prototype" tag, we infer: + * "BindAgent" is a class + * "build" is a method + * + * function BindAgent( repeatableIdName, newParentIdName ) { + * } + * + * CASE 1 + * Specified function name: "build" + * BindAgent.prototype.build = function( mode ) { + * ignore everything within this function + * } + * + * CASE 2 + * Prototype listing + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * + */ + makeClassTag (name); + is_class = TRUE; + is_prototype = TRUE; + + /* + * There should a ".function_name" next. + */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Handle CASE 1 + */ + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + + makeJsTag (token, JSTAG_METHOD); + /* + * We can read until the end of the block / statement. + * We need to correctly parse any nested blocks, but + * we do NOT want to create any tags based on what is + * within the blocks. + */ + token->ignoreTag = TRUE; + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + else if (isType (token, TOKEN_EQUAL_SIGN)) + { + readToken (token); + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Handle CASE 2 + * + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ + parseMethods(token, name); + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + } + readToken (token); + } while (isType (token, TOKEN_PERIOD)); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_OPEN_SQUARE) ) + skipArrayList(token); + + /* + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + is_class = parseBlock (token, name); + } + */ + } + + if ( isType (token, TOKEN_CLOSE_CURLY) ) + { + /* + * Reaching this section without having + * processed an open curly brace indicates + * the statement is most likely not terminated. + */ + is_terminated = FALSE; + goto cleanUp; + } + + if ( isType (token, TOKEN_SEMICOLON) ) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * Handles this syntax: + * var g_var2; + */ + if (isType (token, TOKEN_SEMICOLON)) + makeJsTag (name, JSTAG_VARIABLE); + } + /* + * Statement has ended. + * This deals with calls to functions, like: + * alert(..); + */ + goto cleanUp; + } + + if ( isType (token, TOKEN_EQUAL_SIGN) ) + { + readToken (token); + + if ( isKeyword (token, KEYWORD_function) ) + { + readToken (token); + + if ( isKeyword (token, KEYWORD_NONE) && + ! isType (token, TOKEN_OPEN_PAREN) ) + { + /* + * Functions of this format: + * var D2A = function theAdd(a, b) + * { + * return a+b; + * } + * Are really two separate defined functions and + * can be referenced in two ways: + * alert( D2A(1,2) ); // produces 3 + * alert( theAdd(1,2) ); // also produces 3 + * So it must have two tags: + * D2A + * theAdd + * Save the reference to the name for later use, once + * we have established this is a valid function we will + * create the secondary reference to it. + */ + copyToken(secondary_name, token); + readToken (token); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + if ( is_inside_class ) + { + makeJsTag (name, JSTAG_METHOD); + if ( vStringLength(secondary_name->string) > 0 ) + makeFunctionTag (secondary_name); + parseBlock (token, name); + } + else + { + is_class = parseBlock (token, name); + if ( is_class ) + makeClassTag (name); + else + makeFunctionTag (name); + + if ( vStringLength(secondary_name->string) > 0 ) + makeFunctionTag (secondary_name); + + /* + * Find to the end of the statement + */ + goto cleanUp; + } + } + } + else if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions + * this.method_name = () {} + */ + skipArgumentList(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Nameless functions are only setup as methods. + */ + makeJsTag (name, JSTAG_METHOD); + parseBlock (token, name); + } + } + else if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * Or checks if this is a hash variable. + * var z = {}; + */ + has_methods = parseMethods(token, name); + if ( ! has_methods ) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * A pointer can be created to the function. + * If we recognize the function/class name ignore the variable. + * This format looks identical to a variable definition. + * A variable defined outside of a block is considered + * a global variable: + * var g_var1 = 1; + * var g_var2; + * This is not a global variable: + * var g_var = function; + * This is a global variable: + * var g_var = different_var_name; + */ + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) && + ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + readToken (token); + if ( ! isType (token, TOKEN_SEMICOLON)) + findCmdTerm (token); + if (isType (token, TOKEN_SEMICOLON)) + makeJsTag (name, JSTAG_VARIABLE); + } + vStringDelete (fulltag); + } + } + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * Assume the closing parantheses terminates + * this statements. + */ + is_terminated = TRUE; + } + } + else if (isKeyword (token, KEYWORD_new)) + { + readToken (token); + if ( isKeyword (token, KEYWORD_function) || + isKeyword (token, KEYWORD_capital_function) || + isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + { + if ( isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + is_class = TRUE; + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if (isType (token, TOKEN_SEMICOLON)) + { + if ( token->nestLevel == 0 ) + { + if ( is_class ) + { + makeClassTag (name); + } else { + makeFunctionTag (name); + } + } + } + } + } + else if (isKeyword (token, KEYWORD_NONE)) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * A pointer can be created to the function. + * If we recognize the function/class name ignore the variable. + * This format looks identical to a variable definition. + * A variable defined outside of a block is considered + * a global variable: + * var g_var1 = 1; + * var g_var2; + * This is not a global variable: + * var g_var = function; + * This is a global variable: + * var g_var = different_var_name; + */ + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) && + ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + findCmdTerm (token); + if (isType (token, TOKEN_SEMICOLON)) + makeJsTag (name, JSTAG_VARIABLE); + } + vStringDelete (fulltag); + } + } + } + findCmdTerm (token); + + /* + * Statements can be optionally terminated in the case of + * statement prior to a close curly brace as in the + * document.write line below: + * + * function checkForUpdate() { + * if( 1==1 ) { + * document.write("hello from checkForUpdate
") + * } + * return 1; + * } + */ + if ( ! is_terminated && isType (token, TOKEN_CLOSE_CURLY)) + is_terminated = FALSE; + + +cleanUp: + vStringCopy(token->scope, saveScope); + deleteToken (name); + deleteToken (secondary_name); + vStringDelete(saveScope); + + return is_terminated; +} + +static boolean parseLine (tokenInfo *const token, boolean is_inside_class) +{ + boolean is_terminated = TRUE; + /* + * Detect the common statements, if, while, for, do, ... + * This is necessary since the last statement within a block "{}" + * can be optionally terminated. + * + * If the statement is not terminated, we need to tell + * the calling routine to prevent reading an additional token + * looking for the end of the statement. + */ + + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_for: + case KEYWORD_while: + case KEYWORD_do: + parseLoop (token); + break; + case KEYWORD_if: + case KEYWORD_else: + case KEYWORD_try: + case KEYWORD_catch: + case KEYWORD_finally: + /* Common semantics */ + is_terminated = parseIf (token); + break; + case KEYWORD_switch: + parseSwitch (token); + break; + default: + parseStatement (token, is_inside_class); + break; + } + } + else + { + /* + * Special case where single line statements may not be + * SEMICOLON terminated. parseBlock needs to know this + * so that it does not read the next token. + */ + is_terminated = parseStatement (token, is_inside_class); + } + return is_terminated; +} + +static void parseJsFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_function: parseFunction (token); break; + default: parseLine (token, FALSE); break; + } + } + else + { + parseLine (token, FALSE); + } + } while (TRUE); +} + +static void initialize (const langType language) +{ + Assert (sizeof (JsKinds) / sizeof (JsKinds [0]) == JSTAG_COUNT); + Lang_js = language; + buildJsKeywordHash (); +} + +static void findJsTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + ClassNames = stringListNew (); + FunctionNames = stringListNew (); + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + parseJsFile (token); + + stringListDelete (ClassNames); + stringListDelete (FunctionNames); + ClassNames = NULL; + FunctionNames = NULL; + deleteToken (token); +} + +/* Create parser definition stucture */ +extern parserDefinition* JavaScriptParser (void) +{ + static const char *const extensions [] = { "js", NULL }; + parserDefinition *const def = parserNew ("JavaScript"); + def->extensions = extensions; + /* + * New definitions for parsing instead of regex + */ + def->kinds = JsKinds; + def->kindCount = KIND_COUNT (JsKinds); + def->parser = findJsTags; + def->initialize = initialize; + + return def; +} +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/third_party/ctags/keyword.c b/third_party/ctags/keyword.c new file mode 100644 index 000000000..e54e5d465 --- /dev/null +++ b/third_party/ctags/keyword.c @@ -0,0 +1,260 @@ +// clang-format off +/* +* $Id: keyword.c 715 2009-07-06 03:31:00Z dhiebert $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Manages a keyword hash. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/routines.h" + +/* +* MACROS +*/ +#define HASH_EXPONENT 7 /* must be less than 17 */ + +/* +* DATA DECLARATIONS +*/ +typedef struct sHashEntry { + struct sHashEntry *next; + const char *string; + langType language; + int value; +} hashEntry; + +/* +* DATA DEFINITIONS +*/ +static const unsigned int TableSize = 1 << HASH_EXPONENT; +static hashEntry **HashTable = NULL; + +/* +* FUNCTION DEFINITIONS +*/ + +static hashEntry **getHashTable (void) +{ + static boolean allocated = FALSE; + + if (! allocated) + { + unsigned int i; + + HashTable = xMalloc (TableSize, hashEntry*); + + for (i = 0 ; i < TableSize ; ++i) + HashTable [i] = NULL; + + allocated = TRUE; + } + return HashTable; +} + +static hashEntry *getHashTableEntry (unsigned long hashedValue) +{ + hashEntry **const table = getHashTable (); + hashEntry *entry; + + Assert (hashedValue < TableSize); + entry = table [hashedValue]; + + return entry; +} + +static unsigned long hashValue (const char *const string) +{ + unsigned long value = 0; + const unsigned char *p; + + Assert (string != NULL); + + /* We combine the various words of the multiword key using the method + * described on page 512 of Vol. 3 of "The Art of Computer Programming". + */ + for (p = (const unsigned char *) string ; *p != '\0' ; ++p) + { + value <<= 1; + if (value & 0x00000100L) + value = (value & 0x000000ffL) + 1L; + value ^= *p; + } + /* Algorithm from page 509 of Vol. 3 of "The Art of Computer Programming" + * Treats "value" as a 16-bit integer plus 16-bit fraction. + */ + value *= 40503L; /* = 2^16 * 0.6180339887 ("golden ratio") */ + value &= 0x0000ffffL; /* keep fractional part */ + value >>= 16 - HASH_EXPONENT; /* scale up by hash size and move down */ + + return value; +} + +static hashEntry *newEntry ( + const char *const string, langType language, int value) +{ + hashEntry *const entry = xMalloc (1, hashEntry); + + entry->next = NULL; + entry->string = string; + entry->language = language; + entry->value = value; + + return entry; +} + +/* Note that it is assumed that a "value" of zero means an undefined keyword + * and clients of this function should observe this. Also, all keywords added + * should be added in lower case. If we encounter a case-sensitive language + * whose keywords are in upper case, we will need to redesign this. + */ +extern void addKeyword (const char *const string, langType language, int value) +{ + const unsigned long hashedValue = hashValue (string); + hashEntry *entry = getHashTableEntry (hashedValue); + + if (entry == NULL) + { + hashEntry **const table = getHashTable (); + table [hashedValue] = newEntry (string, language, value); + } + else + { + hashEntry *prev = NULL; + + while (entry != NULL) + { + if (language == entry->language && + strcmp (string, entry->string) == 0) + { + Assert (("Already in table" == NULL)); + } + prev = entry; + entry = entry->next; + } + if (entry == NULL) + { + Assert (prev != NULL); + prev->next = newEntry (string, language, value); + } + } +} + +extern int lookupKeyword (const char *const string, langType language) +{ + const unsigned long hashedValue = hashValue (string); + hashEntry *entry = getHashTableEntry (hashedValue); + int result = -1; + + while (entry != NULL) + { + if (language == entry->language && strcmp (string, entry->string) == 0) + { + result = entry->value; + break; + } + entry = entry->next; + } + return result; +} + +extern void freeKeywordTable (void) +{ + if (HashTable != NULL) + { + unsigned int i; + + for (i = 0 ; i < TableSize ; ++i) + { + hashEntry *entry = HashTable [i]; + + while (entry != NULL) + { + hashEntry *next = entry->next; + eFree (entry); + entry = next; + } + } + eFree (HashTable); + } +} + +extern int analyzeToken (vString *const name, langType language) +{ + vString *keyword = vStringNew (); + int result; + vStringCopyToLower (keyword, name); + result = lookupKeyword (vStringValue (keyword), language); + vStringDelete (keyword); + return result; +} + +#ifdef DEBUG + +static void printEntry (const hashEntry *const entry) +{ + printf (" %-15s %-7s\n", entry->string, getLanguageName (entry->language)); +} + +static unsigned int printBucket (const unsigned int i) +{ + hashEntry **const table = getHashTable (); + hashEntry *entry = table [i]; + unsigned int measure = 1; + boolean first = TRUE; + + printf ("%2d:", i); + if (entry == NULL) + printf ("\n"); + else while (entry != NULL) + { + if (! first) + printf (" "); + else + { + printf (" "); + first = FALSE; + } + printEntry (entry); + entry = entry->next; + measure = 2 * measure; + } + return measure - 1; +} + +extern void printKeywordTable (void) +{ + unsigned long emptyBucketCount = 0; + unsigned long measure = 0; + unsigned int i; + + for (i = 0 ; i < TableSize ; ++i) + { + const unsigned int pass = printBucket (i); + + measure += pass; + if (pass == 0) + ++emptyBucketCount; + } + + printf ("spread measure = %ld\n", measure); + printf ("%ld empty buckets\n", emptyBucketCount); +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/keyword.h b/third_party/ctags/keyword.h new file mode 100644 index 000000000..d8d317d74 --- /dev/null +++ b/third_party/ctags/keyword.h @@ -0,0 +1,35 @@ +// clang-format off +/* +* $Id: keyword.h 658 2008-04-20 23:21:35Z elliotth $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to keyword.c +*/ +#ifndef _KEYWORD_H +#define _KEYWORD_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "third_party/ctags/parse.h" + +/* +* FUNCTION PROTOTYPES +*/ +extern void addKeyword (const char *const string, langType language, int value); +extern int lookupKeyword (const char *const string, langType language); +extern void freeKeywordTable (void); +#ifdef DEBUG +extern void printKeywordTable (void); +#endif +extern int analyzeToken (vString *const name, langType language); + +#endif /* _KEYWORD_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/lisp.c b/third_party/ctags/lisp.c new file mode 100644 index 000000000..ee30900c3 --- /dev/null +++ b/third_party/ctags/lisp.c @@ -0,0 +1,140 @@ +// clang-format off +/* +* $Id: lisp.c 717 2009-07-07 03:40:50Z dhiebert $ +* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for LISP files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION +} lispKind; + +static kindOption LispKinds [] = { + { TRUE, 'f', "function", "functions" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* + * lisp tag functions + * look for (def or (DEF, quote or QUOTE + */ +static int L_isdef (const unsigned char *strp) +{ + return ( (strp [1] == 'd' || strp [1] == 'D') + && (strp [2] == 'e' || strp [2] == 'E') + && (strp [3] == 'f' || strp [3] == 'F')); +} + +static int L_isquote (const unsigned char *strp) +{ + return ( (*(++strp) == 'q' || *strp == 'Q') + && (*(++strp) == 'u' || *strp == 'U') + && (*(++strp) == 'o' || *strp == 'O') + && (*(++strp) == 't' || *strp == 'T') + && (*(++strp) == 'e' || *strp == 'E') + && isspace (*(++strp))); +} + +static void L_getit (vString *const name, const unsigned char *dbp) +{ + const unsigned char *p; + + if (*dbp == '\'') /* Skip prefix quote */ + dbp++; + else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */ + { + dbp += 7; + while (isspace (*dbp)) + dbp++; + } + for (p=dbp ; *p!='\0' && *p!='(' && !isspace ((int) *p) && *p!=')' ; p++) + vStringPut (name, *p); + vStringTerminate (name); + + if (vStringLength (name) > 0) + makeSimpleTag (name, LispKinds, K_FUNCTION); + vStringClear (name); +} + +/* Algorithm adapted from from GNU etags. + */ +static void findLispTags (void) +{ + vString *name = vStringNew (); + const unsigned char* p; + + + while ((p = fileReadLine ()) != NULL) + { + if (*p == '(') + { + if (L_isdef (p)) + { + while (*p != '\0' && !isspace ((int) *p)) + p++; + while (isspace ((int) *p)) + p++; + L_getit (name, p); + } + else + { + /* Check for (foo::defmumble name-defined ... */ + do + p++; + while (*p != '\0' && !isspace ((int) *p) + && *p != ':' && *p != '(' && *p != ')'); + if (*p == ':') + { + do + p++; + while (*p == ':'); + + if (L_isdef (p - 1)) + { + while (*p != '\0' && !isspace ((int) *p)) + p++; + while (isspace (*p)) + p++; + L_getit (name, p); + } + } + } + } + } + vStringDelete (name); +} + +extern parserDefinition* LispParser (void) +{ + static const char *const extensions [] = { + "cl", "clisp", "el", "l", "lisp", "lsp", NULL + }; + parserDefinition* def = parserNew ("Lisp"); + def->kinds = LispKinds; + def->kindCount = KIND_COUNT (LispKinds); + def->extensions = extensions; + def->parser = findLispTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/lregex.c b/third_party/ctags/lregex.c new file mode 100644 index 000000000..1bf6a645c --- /dev/null +++ b/third_party/ctags/lregex.c @@ -0,0 +1,713 @@ +// clang-format off +/* +* $Id: lregex.c 747 2009-11-06 02:33:37Z dhiebert $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for applying regular expression matching. +* +* The code for utlizing the Gnu regex package with regards to processing the +* regex option and checking for regex matches was adapted from routines in +* Gnu etags. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#ifdef HAVE_REGCOMP +#include "libc/str/str.h" + +# ifdef HAVE_SYS_TYPES_H +#include "libc/calls/makedev.h" +#include "libc/calls/weirdtypes.h" +#include "libc/thread/thread.h" +#include "libc/calls/typedef/u.h" +#include "libc/calls/weirdtypes.h" +#include "libc/intrin/newbie.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/endian.h" /* declare off_t (not known to regex.h on FreeBSD) */ +# endif +#include "third_party/regex/regex.h" +#endif + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" + +#ifdef HAVE_REGEX + +/* +* MACROS +*/ + +/* Back-references \0 through \9 */ +#define BACK_REFERENCE_COUNT 10 + +#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN) +# define POSIX_REGEX +#endif + +#define REGEX_NAME "Regex" + +/* +* DATA DECLARATIONS +*/ +#if defined (POSIX_REGEX) + +struct sKind { + boolean enabled; + char letter; + char* name; + char* description; +}; + +enum pType { PTRN_TAG, PTRN_CALLBACK }; + +typedef struct { + regex_t *pattern; + enum pType type; + union { + struct { + char *name_pattern; + struct sKind kind; + } tag; + struct { + regexCallback function; + } callback; + } u; +} regexPattern; + +#endif + +typedef struct { + regexPattern *patterns; + unsigned int count; +} patternSet; + +/* +* DATA DEFINITIONS +*/ + +static boolean regexBroken = FALSE; + +/* Array of pattern sets, indexed by language */ +static patternSet* Sets = NULL; +static int SetUpper = -1; /* upper language index in list */ + +/* +* FUNCTION DEFINITIONS +*/ + +static void clearPatternSet (const langType language) +{ + if (language <= SetUpper) + { + patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + { + regexPattern *p = &set->patterns [i]; +#if defined (POSIX_REGEX) + regfree (p->pattern); +#endif + eFree (p->pattern); + p->pattern = NULL; + + if (p->type == PTRN_TAG) + { + eFree (p->u.tag.name_pattern); + p->u.tag.name_pattern = NULL; + eFree (p->u.tag.kind.name); + p->u.tag.kind.name = NULL; + if (p->u.tag.kind.description != NULL) + { + eFree (p->u.tag.kind.description); + p->u.tag.kind.description = NULL; + } + } + } + if (set->patterns != NULL) + eFree (set->patterns); + set->patterns = NULL; + set->count = 0; + } +} + +/* +* Regex psuedo-parser +*/ + +static void makeRegexTag ( + const vString* const name, const struct sKind* const kind) +{ + if (kind->enabled) + { + tagEntryInfo e; + Assert (name != NULL && vStringLength (name) > 0); + Assert (kind != NULL); + initTagEntry (&e, vStringValue (name)); + e.kind = kind->letter; + e.kindName = kind->name; + makeTagEntry (&e); + } +} + +/* +* Regex pattern definition +*/ + +/* Take a string like "/blah/" and turn it into "blah", making sure + * that the first and last characters are the same, and handling + * quoted separator characters. Actually, stops on the occurrence of + * an unquoted separator. Also turns "\t" into a Tab character. + * Returns pointer to terminating separator. Works in place. Null + * terminates name string. + */ +static char* scanSeparators (char* name) +{ + char sep = name [0]; + char *copyto = name; + boolean quoted = FALSE; + + for (++name ; *name != '\0' ; ++name) + { + if (quoted) + { + if (*name == sep) + *copyto++ = sep; + else if (*name == 't') + *copyto++ = '\t'; + else + { + /* Something else is quoted, so preserve the quote. */ + *copyto++ = '\\'; + *copyto++ = *name; + } + quoted = FALSE; + } + else if (*name == '\\') + quoted = TRUE; + else if (*name == sep) + { + break; + } + else + *copyto++ = *name; + } + *copyto = '\0'; + return name; +} + +/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator + * character is whatever the first character of `regexp' is), by breaking it + * up into null terminated strings, removing the separators, and expanding + * '\t' into tabs. When complete, `regexp' points to the line matching + * pattern, a pointer to the name matching pattern is written to `name', a + * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer + * to the trailing flags is written to `flags'. If the pattern is not in the + * correct format, a false value is returned. + */ +static boolean parseTagRegex ( + char* const regexp, char** const name, + char** const kinds, char** const flags) +{ + boolean result = FALSE; + const int separator = (unsigned char) regexp [0]; + + *name = scanSeparators (regexp); + if (*regexp == '\0') + error (WARNING, "empty regexp"); + else if (**name != separator) + error (WARNING, "%s: incomplete regexp", regexp); + else + { + char* const third = scanSeparators (*name); + if (**name == '\0') + error (WARNING, "%s: regexp missing name pattern", regexp); + if ((*name) [strlen (*name) - 1] == '\\') + error (WARNING, "error in name pattern: \"%s\"", *name); + if (*third != separator) + error (WARNING, "%s: regexp missing final separator", regexp); + else + { + char* const fourth = scanSeparators (third); + if (*fourth == separator) + { + *kinds = third; + scanSeparators (fourth); + *flags = fourth; + } + else + { + *flags = third; + *kinds = NULL; + } + result = TRUE; + } + } + return result; +} + +static void addCompiledTagPattern ( + const langType language, regex_t* const pattern, + char* const name, const char kind, char* const kindName, + char *const description) +{ + patternSet* set; + regexPattern *ptrn; + if (language > SetUpper) + { + int i; + Sets = xRealloc (Sets, (language + 1), patternSet); + for (i = SetUpper + 1 ; i <= language ; ++i) + { + Sets [i].patterns = NULL; + Sets [i].count = 0; + } + SetUpper = language; + } + set = Sets + language; + set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern); + ptrn = &set->patterns [set->count]; + set->count += 1; + + ptrn->pattern = pattern; + ptrn->type = PTRN_TAG; + ptrn->u.tag.name_pattern = name; + ptrn->u.tag.kind.enabled = TRUE; + ptrn->u.tag.kind.letter = kind; + ptrn->u.tag.kind.name = kindName; + ptrn->u.tag.kind.description = description; +} + +static void addCompiledCallbackPattern ( + const langType language, regex_t* const pattern, + const regexCallback callback) +{ + patternSet* set; + regexPattern *ptrn; + if (language > SetUpper) + { + int i; + Sets = xRealloc (Sets, (language + 1), patternSet); + for (i = SetUpper + 1 ; i <= language ; ++i) + { + Sets [i].patterns = NULL; + Sets [i].count = 0; + } + SetUpper = language; + } + set = Sets + language; + set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern); + ptrn = &set->patterns [set->count]; + set->count += 1; + + ptrn->pattern = pattern; + ptrn->type = PTRN_CALLBACK; + ptrn->u.callback.function = callback; +} + +#if defined (POSIX_REGEX) + +static regex_t* compileRegex (const char* const regexp, const char* const flags) +{ + int cflags = REG_EXTENDED | REG_NEWLINE; + regex_t *result = NULL; + int errcode; + int i; + for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i) + { + switch ((int) flags [i]) + { + case 'b': cflags &= ~REG_EXTENDED; break; + case 'e': cflags |= REG_EXTENDED; break; + case 'i': cflags |= REG_ICASE; break; + default: error (WARNING, "unknown regex flag: '%c'", *flags); break; + } + } + result = xMalloc (1, regex_t); + errcode = regcomp (result, regexp, cflags); + if (errcode != 0) + { + char errmsg[256]; + regerror (errcode, result, errmsg, 256); + error (WARNING, "regcomp %s: %s", regexp, errmsg); + regfree (result); + eFree (result); + result = NULL; + } + return result; +} + +#endif + +static void parseKinds ( + const char* const kinds, char* const kind, char** const kindName, + char **description) +{ + *kind = '\0'; + *kindName = NULL; + *description = NULL; + if (kinds == NULL || kinds [0] == '\0') + { + *kind = 'r'; + *kindName = eStrdup ("regex"); + } + else if (kinds [0] != '\0') + { + const char* k = kinds; + if (k [0] != ',' && (k [1] == ',' || k [1] == '\0')) + *kind = *k++; + else + *kind = 'r'; + if (*k == ',') + ++k; + if (k [0] == '\0') + *kindName = eStrdup ("regex"); + else + { + const char *const comma = strchr (k, ','); + if (comma == NULL) + *kindName = eStrdup (k); + else + { + *kindName = (char*) eMalloc (comma - k + 1); + strncpy (*kindName, k, comma - k); + (*kindName) [comma - k] = '\0'; + k = comma + 1; + if (k [0] != '\0') + *description = eStrdup (k); + } + } + } +} + +static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent) +{ + const struct sKind *const kind = &pat [i].u.tag.kind; + const char *const indentation = indent ? " " : ""; + Assert (pat [i].type == PTRN_TAG); + printf ("%s%c %s %s\n", indentation, + kind->letter != '\0' ? kind->letter : '?', + kind->description != NULL ? kind->description : kind->name, + kind->enabled ? "" : " [off]"); +} + +static void processLanguageRegex (const langType language, + const char* const parameter) +{ + if (parameter == NULL || parameter [0] == '\0') + clearPatternSet (language); + else if (parameter [0] != '@') + addLanguageRegex (language, parameter); + else if (! doesFileExist (parameter + 1)) + error (WARNING, "cannot open regex file"); + else + { + const char* regexfile = parameter + 1; + FILE* const fp = fopen (regexfile, "r"); + if (fp == NULL) + error (WARNING | PERROR, "%s", regexfile); + else + { + vString* const regex = vStringNew (); + while (readLine (regex, fp)) + addLanguageRegex (language, vStringValue (regex)); + fclose (fp); + vStringDelete (regex); + } + } +} + +/* +* Regex pattern matching +*/ + +#if defined (POSIX_REGEX) + +static vString* substitute ( + const char* const in, const char* out, + const int nmatch, const regmatch_t* const pmatch) +{ + vString* result = vStringNew (); + const char* p; + for (p = out ; *p != '\0' ; p++) + { + if (*p == '\\' && isdigit ((int) *++p)) + { + const int dig = *p - '0'; + if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1) + { + const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so; + vStringNCatS (result, in + pmatch [dig].rm_so, diglen); + } + } + else if (*p != '\n' && *p != '\r') + vStringPut (result, *p); + } + vStringTerminate (result); + return result; +} + +static void matchTagPattern (const vString* const line, + const regexPattern* const patbuf, + const regmatch_t* const pmatch) +{ + vString *const name = substitute (vStringValue (line), + patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch); + vStringStripLeading (name); + vStringStripTrailing (name); + if (vStringLength (name) > 0) + makeRegexTag (name, &patbuf->u.tag.kind); + else + error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"", + getInputFileName (), getInputLineNumber (), + patbuf->u.tag.name_pattern); + vStringDelete (name); +} + +static void matchCallbackPattern ( + const vString* const line, const regexPattern* const patbuf, + const regmatch_t* const pmatch) +{ + regexMatch matches [BACK_REFERENCE_COUNT]; + unsigned int count = 0; + int i; + for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i) + { + matches [i].start = pmatch [i].rm_so; + matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so; + ++count; + } + patbuf->u.callback.function (vStringValue (line), matches, count); +} + +static boolean matchRegexPattern (const vString* const line, + const regexPattern* const patbuf) +{ + boolean result = FALSE; + regmatch_t pmatch [BACK_REFERENCE_COUNT]; + const int match = regexec (patbuf->pattern, vStringValue (line), + BACK_REFERENCE_COUNT, pmatch, 0); + if (match == 0) + { + result = TRUE; + if (patbuf->type == PTRN_TAG) + matchTagPattern (line, patbuf, pmatch); + else if (patbuf->type == PTRN_CALLBACK) + matchCallbackPattern (line, patbuf, pmatch); + else + { + Assert ("invalid pattern type" == NULL); + result = FALSE; + } + } + return result; +} + +#endif + +/* PUBLIC INTERFACE */ + +/* Match against all patterns for specified language. Returns true if at least + * on pattern matched. + */ +extern boolean matchRegex (const vString* const line, const langType language) +{ + boolean result = FALSE; + if (language != LANG_IGNORE && language <= SetUpper && + Sets [language].count > 0) + { + const patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + if (matchRegexPattern (line, set->patterns + i)) + result = TRUE; + } + return result; +} + +extern void findRegexTags (void) +{ + /* merely read all lines of the file */ + while (fileReadLine () != NULL) + ; +} + +#endif /* HAVE_REGEX */ + +extern void addTagRegex ( + const langType language __unused, + const char* const regex __unused, + const char* const name __unused, + const char* const kinds __unused, + const char* const flags __unused) +{ +#ifdef HAVE_REGEX + Assert (regex != NULL); + Assert (name != NULL); + if (! regexBroken) + { + regex_t* const cp = compileRegex (regex, flags); + if (cp != NULL) + { + char kind; + char* kindName; + char* description; + parseKinds (kinds, &kind, &kindName, &description); + addCompiledTagPattern (language, cp, eStrdup (name), + kind, kindName, description); + } + } +#endif +} + +extern void addCallbackRegex ( + const langType language __unused, + const char* const regex __unused, + const char* const flags __unused, + const regexCallback callback __unused) +{ +#ifdef HAVE_REGEX + Assert (regex != NULL); + if (! regexBroken) + { + regex_t* const cp = compileRegex (regex, flags); + if (cp != NULL) + addCompiledCallbackPattern (language, cp, callback); + } +#endif +} + +extern void addLanguageRegex ( + const langType language __unused, const char* const regex __unused) +{ +#ifdef HAVE_REGEX + if (! regexBroken) + { + char *const regex_pat = eStrdup (regex); + char *name, *kinds, *flags; + if (parseTagRegex (regex_pat, &name, &kinds, &flags)) + { + addTagRegex (language, regex_pat, name, kinds, flags); + eFree (regex_pat); + } + } +#endif +} + +/* +* Regex option parsing +*/ + +extern boolean processRegexOption (const char *const option, + const char *const parameter __unused) +{ + boolean handled = FALSE; + const char* const dash = strchr (option, '-'); + if (dash != NULL && strncmp (option, "regex", dash - option) == 0) + { +#ifdef HAVE_REGEX + langType language; + language = getNamedLanguage (dash + 1); + if (language == LANG_IGNORE) + error (WARNING, "unknown language \"%s\" in --%s option", (dash + 1), option); + else + processLanguageRegex (language, parameter); +#else + error (WARNING, "regex support not available; required for --%s option", + option); +#endif + handled = TRUE; + } + return handled; +} + +extern void disableRegexKinds (const langType language __unused) +{ +#ifdef HAVE_REGEX + if (language <= SetUpper && Sets [language].count > 0) + { + patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + if (set->patterns [i].type == PTRN_TAG) + set->patterns [i].u.tag.kind.enabled = FALSE; + } +#endif +} + +extern boolean enableRegexKind ( + const langType language __unused, + const int kind __unused, const boolean mode __unused) +{ + boolean result = FALSE; +#ifdef HAVE_REGEX + if (language <= SetUpper && Sets [language].count > 0) + { + patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + if (set->patterns [i].type == PTRN_TAG && + set->patterns [i].u.tag.kind.letter == kind) + { + set->patterns [i].u.tag.kind.enabled = mode; + result = TRUE; + } + } +#endif + return result; +} + +extern void printRegexKinds (const langType language __unused, boolean indent __unused) +{ +#ifdef HAVE_REGEX + if (language <= SetUpper && Sets [language].count > 0) + { + patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + if (set->patterns [i].type == PTRN_TAG) + printRegexKind (set->patterns, i, indent); + } +#endif +} + +extern void freeRegexResources (void) +{ +#ifdef HAVE_REGEX + int i; + for (i = 0 ; i <= SetUpper ; ++i) + clearPatternSet (i); + if (Sets != NULL) + eFree (Sets); + Sets = NULL; + SetUpper = -1; +#endif +} + +/* Check for broken regcomp() on Cygwin */ +extern void checkRegex (void) +{ +#if defined (HAVE_REGEX) && defined (CHECK_REGCOMP) + regex_t patbuf; + int errcode; + if (regcomp (&patbuf, "/hello/", 0) != 0) + { + error (WARNING, "Disabling broken regex"); + regexBroken = TRUE; + } +#endif +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/lua.c b/third_party/ctags/lua.c new file mode 100644 index 000000000..5958b6605 --- /dev/null +++ b/third_party/ctags/lua.c @@ -0,0 +1,135 @@ +// clang-format off +/* +* $Id: lua.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2001, Max Ischenko . +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Lua language. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/options.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION +} luaKind; + +static kindOption LuaKinds [] = { + { TRUE, 'f', "function", "functions" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* for debugging purposes */ +static void __unused print_string (char *p, char *q) +{ + for ( ; p != q; p++) + fprintf (errout, "%c", *p); + fprintf (errout, "\n"); +} + +/* + * Helper function. + * Returns 1 if line looks like a line of Lua code. + * + * TODO: Recognize UNIX bang notation. + * (Lua treat first line as a comment if it starts with #!) + * + */ +static boolean is_a_code_line (const unsigned char *line) +{ + boolean result; + const unsigned char *p = line; + while (isspace ((int) *p)) + p++; + if (p [0] == '\0') + result = FALSE; + else if (p [0] == '-' && p [1] == '-') + result = FALSE; + else + result = TRUE; + return result; +} + +static void extract_name (const char *begin, const char *end, vString *name) +{ + if (begin != NULL && end != NULL && begin < end) + { + const char *cp; + + while (isspace ((int) *begin)) + begin++; + while (isspace ((int) *end)) + end--; + if (begin < end) + { + for (cp = begin ; cp != end; cp++) + vStringPut (name, (int) *cp); + vStringTerminate (name); + + makeSimpleTag (name, LuaKinds, K_FUNCTION); + vStringClear (name); + } + } +} + +static void findLuaTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const char *p, *q; + + if (! is_a_code_line (line)) + continue; + + p = (const char*) strstr ((const char*) line, "function"); + if (p == NULL) + continue; + + q = strchr ((const char*) line, '='); + + if (q == NULL) { + p = p + 9; /* skip the `function' word */ + q = strchr ((const char*) p, '('); + extract_name (p, q, name); + } else { + p = (const char*) &line[0]; + extract_name (p, q, name); + } + } + vStringDelete (name); +} + +extern parserDefinition* LuaParser (void) +{ + static const char* const extensions [] = { "lua", NULL }; + parserDefinition* def = parserNew ("Lua"); + def->kinds = LuaKinds; + def->kindCount = KIND_COUNT (LuaKinds); + def->extensions = extensions; + def->parser = findLuaTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/main.c b/third_party/ctags/main.c new file mode 100644 index 000000000..bb9e545cd --- /dev/null +++ b/third_party/ctags/main.c @@ -0,0 +1,393 @@ +/* + * $Id: main.c 536 2007-06-02 06:09:00Z elliotth $ + * + * Copyright (c) 1996-2003, Darren Hiebert + * + * Author: Darren Hiebert + * http://ctags.sourceforge.net + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. It is provided on an as-is basis and no + * responsibility is accepted for its failure to perform as expected. + * + * This is a reimplementation of the ctags (1) program. It is an attempt to + * provide a fully featured ctags program which is free of the limitations + * which most (all?) others are subject to. + * + * This module contains the start-up code and routines to determine the list + * of files to parsed for tags. + */ +#include "libc/runtime/runtime.h" +#include "third_party/ctags/general.h" /* must always come first */ +/**/ +#include "libc/calls/struct/dirent.h" +#include "libc/calls/weirdtypes.h" +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/time/time.h" +#include "third_party/ctags/debug.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/main.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +// clang-format off + +/* +* MACROS +*/ +#define plural(value) (((unsigned long)(value) == 1L) ? "" : "s") + +/* +* DATA DEFINITIONS +*/ +static struct { long files, lines, bytes; } Totals = { 0, 0, 0 }; + +/* +* FUNCTION PROTOTYPES +*/ +static boolean createTagsForEntry (const char *const entryName); + +/* +* FUNCTION DEFINITIONS +*/ + +extern void addTotals ( + const unsigned int files, const long unsigned int lines, + const long unsigned int bytes) +{ + Totals.files += files; + Totals.lines += lines; + Totals.bytes += bytes; +} + +extern boolean isDestinationStdout (void) +{ + boolean toStdout = FALSE; + + if (Option.xref || Option.filter || + (Option.tagFileName != NULL && (strcmp (Option.tagFileName, "-") == 0 + || strcmp (Option.tagFileName, "/dev/stdout") == 0 + ))) + toStdout = TRUE; + return toStdout; +} + +static boolean recurseUsingOpendir (const char *const dirName) +{ + boolean resize = FALSE; + DIR *const dir = opendir (dirName); + if (dir == NULL) + error (WARNING | PERROR, "cannot recurse into directory \"%s\"", dirName); + else + { + struct dirent *entry; + while ((entry = readdir (dir)) != NULL) + { + if (strcmp (entry->d_name, ".") != 0 && + strcmp (entry->d_name, "..") != 0) + { + vString *filePath; + if (strcmp (dirName, ".") == 0) + filePath = vStringNewInit (entry->d_name); + else + filePath = combinePathAndFile (dirName, entry->d_name); + resize |= createTagsForEntry (vStringValue (filePath)); + vStringDelete (filePath); + } + } + closedir (dir); + } + return resize; +} + + +static boolean recurseIntoDirectory (const char *const dirName) +{ + boolean resize = FALSE; + if (isRecursiveLink (dirName)) + verbose ("ignoring \"%s\" (recursive link)\n", dirName); + else if (! Option.recurse) + verbose ("ignoring \"%s\" (directory)\n", dirName); + else + { + verbose ("RECURSING into directory \"%s\"\n", dirName); + resize = recurseUsingOpendir (dirName); + } + return resize; +} + +static boolean createTagsForEntry (const char *const entryName) +{ + boolean resize = FALSE; + fileStatus *status = eStat (entryName); + + Assert (entryName != NULL); + if (isExcludedFile (entryName)) + verbose ("excluding \"%s\"\n", entryName); + else if (status->isSymbolicLink && ! Option.followLinks) + verbose ("ignoring \"%s\" (symbolic link)\n", entryName); + else if (! status->exists) + error (WARNING | PERROR, "cannot open source file \"%s\"", entryName); + else if (status->isDirectory) + resize = recurseIntoDirectory (entryName); + else if (! status->isNormalFile) + verbose ("ignoring \"%s\" (special file)\n", entryName); + else + resize = parseFile (entryName); + + eStatFree (status); + return resize; +} + +#ifdef MANUAL_GLOBBING + +static boolean createTagsForWildcardArg (const char *const arg) +{ + boolean resize = FALSE; + vString *const pattern = vStringNewInit (arg); + char *patternS = vStringValue (pattern); + +#if defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST) + /* We must transform the "." and ".." forms into something that can + * be expanded by the findfirst/_findfirst functions. + */ + if (Option.recurse && + (strcmp (patternS, ".") == 0 || strcmp (patternS, "..") == 0)) + { + vStringPut (pattern, OUTPUT_PATH_SEPARATOR); + vStringCatS (pattern, "*.*"); + } + resize |= createTagsForWildcardUsingFindfirst (patternS); +#endif + vStringDelete (pattern); + return resize; +} + +#endif + +static boolean createTagsForArgs (cookedArgs *const args) +{ + boolean resize = FALSE; + + /* Generate tags for each argument on the command line. + */ + while (! cArgOff (args)) + { + const char *const arg = cArgItem (args); + +#ifdef MANUAL_GLOBBING + resize |= createTagsForWildcardArg (arg); +#else + resize |= createTagsForEntry (arg); +#endif + cArgForth (args); + parseOptions (args); + } + return resize; +} + +/* Read from an opened file a list of file names for which to generate tags. + */ +static boolean createTagsFromFileInput (FILE *const fp, const boolean filter) +{ + boolean resize = FALSE; + if (fp != NULL) + { + cookedArgs *args = cArgNewFromLineFile (fp); + parseOptions (args); + while (! cArgOff (args)) + { + resize |= createTagsForEntry (cArgItem (args)); + if (filter) + { + if (Option.filterTerminator != NULL) + fputs (Option.filterTerminator, stdout); + fflush (stdout); + } + cArgForth (args); + parseOptions (args); + } + cArgDelete (args); + } + return resize; +} + +/* Read from a named file a list of file names for which to generate tags. + */ +static boolean createTagsFromListFile (const char *const fileName) +{ + boolean resize; + Assert (fileName != NULL); + if (strcmp (fileName, "-") == 0) + resize = createTagsFromFileInput (stdin, FALSE); + else + { + FILE *const fp = fopen (fileName, "r"); + if (fp == NULL) + error (FATAL | PERROR, "cannot open list file \"%s\"", fileName); + resize = createTagsFromFileInput (fp, FALSE); + fclose (fp); + } + return resize; +} + +#if defined (HAVE_CLOCK) +# define CLOCK_AVAILABLE +# ifndef CLOCKS_PER_SEC +# define CLOCKS_PER_SEC 1000000 +# endif +#elif defined (HAVE_TIMES) +# define CLOCK_AVAILABLE +# define CLOCKS_PER_SEC 60 +static clock_t clock (void) +{ + struct tms buf; + + times (&buf); + return (buf.tms_utime + buf.tms_stime); +} +#else +# define clock() (clock_t)0 +#endif + +static void printTotals (const clock_t *const timeStamps) +{ + const unsigned long totalTags = TagFile.numTags.added + + TagFile.numTags.prev; + + fprintf (errout, "%ld file%s, %ld line%s (%ld kB) scanned", + Totals.files, plural (Totals.files), + Totals.lines, plural (Totals.lines), + Totals.bytes/1024L); +#ifdef CLOCK_AVAILABLE + { + const double interval = ((double) (timeStamps [1] - timeStamps [0])) / + CLOCKS_PER_SEC; + + fprintf (errout, " in %.01f seconds", interval); + if (interval != (double) 0.0) + fprintf (errout, " (%lu kB/s)", + (unsigned long) (Totals.bytes / interval) / 1024L); + } +#endif + fputc ('\n', errout); + + fprintf (errout, "%lu tag%s added to tag file", + TagFile.numTags.added, plural (TagFile.numTags.added)); + if (Option.append) + fprintf (errout, " (now %lu tags)", totalTags); + fputc ('\n', errout); + + if (totalTags > 0 && Option.sorted != SO_UNSORTED) + { + fprintf (errout, "%lu tag%s sorted", totalTags, plural (totalTags)); +#ifdef CLOCK_AVAILABLE + fprintf (errout, " in %.02f seconds", + ((double) (timeStamps [2] - timeStamps [1])) / CLOCKS_PER_SEC); +#endif + fputc ('\n', errout); + } + +#ifdef DEBUG + fprintf (errout, "longest tag line = %lu\n", + (unsigned long) TagFile.max.line); +#endif +} + +static boolean etagsInclude (void) +{ + return (boolean)(Option.etags && Option.etagsInclude != NULL); +} + +static void makeTags (cookedArgs *args) +{ + clock_t timeStamps [3]; + boolean resize = FALSE; + boolean files = (boolean)(! cArgOff (args) || Option.fileList != NULL + || Option.filter); + + if (! files) + { + if (filesRequired ()) + error (FATAL, "No files specified. Try \"%s --help\".", + getExecutableName ()); + else if (! Option.recurse && ! etagsInclude ()) + return; + } + +#define timeStamp(n) timeStamps[(n)]=(Option.printTotals ? clock():(clock_t)0) + if (! Option.filter) + openTagFile (); + + timeStamp (0); + + if (! cArgOff (args)) + { + verbose ("Reading command line arguments\n"); + resize = createTagsForArgs (args); + } + if (Option.fileList != NULL) + { + verbose ("Reading list file\n"); + resize = (boolean) (createTagsFromListFile (Option.fileList) || resize); + } + if (Option.filter) + { + verbose ("Reading filter input\n"); + resize = (boolean) (createTagsFromFileInput (stdin, TRUE) || resize); + } + if (! files && Option.recurse) + resize = recurseIntoDirectory ("."); + + timeStamp (1); + + if (! Option.filter) + closeTagFile (resize); + + timeStamp (2); + + if (Option.printTotals) + printTotals (timeStamps); +#undef timeStamp +} + +/* + * Start up code + */ + +extern int main (int __unused argc, char **argv) +{ + cookedArgs *args; + + setCurrentDirectory (); + setExecutableName (*argv++); + checkRegex (); + + args = cArgNewFromArgv (argv); + previewFirstOption (args); + testEtagsInvocation (); + initializeParsing (); + initOptions (); + readOptionConfiguration (); + verbose ("Reading initial options from command line\n"); + parseOptions (args); + checkOptions (); + makeTags (args); + + /* Clean up. + */ + cArgDelete (args); + freeKeywordTable (); + freeRoutineResources (); + freeSourceFileResources (); + freeTagFileResources (); + freeOptionResources (); + freeParserResources (); + freeRegexResources (); + + exit (0); + return 0; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/main.h b/third_party/ctags/main.h new file mode 100644 index 000000000..7e3a6b112 --- /dev/null +++ b/third_party/ctags/main.h @@ -0,0 +1,40 @@ +// clang-format off +/* +* $Id: main.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to main.c +*/ +#ifndef _MAIN_H +#define _MAIN_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +#include "third_party/ctags/vstring.h" + +/* +* FUNCTION PROTOTYPES +*/ +extern void addTotals (const unsigned int files, const long unsigned int lines, const long unsigned int bytes); +extern boolean isDestinationStdout (void); +extern int main (int argc, char **argv); + +#endif /* _MAIN_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/make.c b/third_party/ctags/make.c new file mode 100644 index 000000000..410a271a6 --- /dev/null +++ b/third_party/ctags/make.c @@ -0,0 +1,219 @@ +// clang-format off +/* +* $Id: make.c 751 2010-02-27 17:41:57Z elliotth $ +* +* Copyright (c) 2000-2005, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for makefiles. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/str/str.h" + +#include "third_party/ctags/options.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_MACRO +} shKind; + +static kindOption MakeKinds [] = { + { TRUE, 'm', "macro", "macros"} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static int nextChar (void) +{ + int c = fileGetc (); + if (c == '\\') + { + c = fileGetc (); + if (c == '\n') + c = fileGetc (); + } + return c; +} + +static void skipLine (void) +{ + int c; + do + c = nextChar (); + while (c != EOF && c != '\n'); + if (c == '\n') + fileUngetc (c); +} + +static int skipToNonWhite (void) +{ + int c; + do + c = nextChar (); + while (c != '\n' && isspace (c)); + return c; +} + +static boolean isIdentifier (int c) +{ + return (boolean)(c != '\0' && (isalnum (c) || strchr (".-_", c) != NULL)); +} + +static void readIdentifier (const int first, vString *const id) +{ + int c = first; + vStringClear (id); + while (isIdentifier (c)) + { + vStringPut (id, c); + c = nextChar (); + } + fileUngetc (c); + vStringTerminate (id); +} + +static void skipToMatch (const char *const pair) +{ + const int begin = pair [0], end = pair [1]; + const unsigned long inputLineNumber = getInputLineNumber (); + int matchLevel = 1; + int c = '\0'; + + while (matchLevel > 0) + { + c = nextChar (); + if (c == begin) + ++matchLevel; + else if (c == end) + --matchLevel; + else if (c == '\n' || c == EOF) + break; + } + if (c == EOF) + verbose ("%s: failed to find match for '%c' at line %lu\n", + getInputFileName (), begin, inputLineNumber); +} + +static void findMakeTags (void) +{ + vString *name = vStringNew (); + boolean newline = TRUE; + boolean in_define = FALSE; + boolean in_rule = FALSE; + boolean variable_possible = TRUE; + int c; + + while ((c = nextChar ()) != EOF) + { + if (newline) + { + if (in_rule) + { + if (c == '\t') + { + skipLine (); /* skip rule */ + continue; + } + else + in_rule = FALSE; + } + variable_possible = (boolean)(!in_rule); + newline = FALSE; + } + if (c == '\n') + newline = TRUE; + else if (isspace (c)) + continue; + else if (c == '#') + skipLine (); + else if (c == '(') + skipToMatch ("()"); + else if (c == '{') + skipToMatch ("{}"); + else if (c == ':') + { + variable_possible = TRUE; + in_rule = TRUE; + } + else if (variable_possible && isIdentifier (c)) + { + readIdentifier (c, name); + if (strcmp (vStringValue (name), "endef") == 0) + in_define = FALSE; + else if (in_define) + skipLine (); + else if (strcmp (vStringValue (name), "define") == 0 && + isIdentifier (c)) + { + in_define = TRUE; + c = skipToNonWhite (); + readIdentifier (c, name); + makeSimpleTag (name, MakeKinds, K_MACRO); + skipLine (); + } + else { + if (strcmp(vStringValue (name), "export") == 0 && + isIdentifier (c)) + { + c = skipToNonWhite (); + readIdentifier (c, name); + } + c = skipToNonWhite (); + if (strchr (":?+", c) != NULL) + { + boolean append = (boolean)(c == '+'); + if (c == ':') + in_rule = TRUE; + c = nextChar (); + if (c != '=') + fileUngetc (c); + else if (append) + { + skipLine (); + continue; + } + } + if (c == '=') + { + makeSimpleTag (name, MakeKinds, K_MACRO); + in_rule = FALSE; + skipLine (); + } + } + } + else + variable_possible = FALSE; + } + vStringDelete (name); +} + +extern parserDefinition* MakefileParser (void) +{ + static const char *const patterns [] = { "[Mm]akefile", "GNUmakefile", NULL }; + static const char *const extensions [] = { "mak", "mk", NULL }; + parserDefinition* const def = parserNew ("Make"); + def->kinds = MakeKinds; + def->kindCount = KIND_COUNT (MakeKinds); + def->patterns = patterns; + def->extensions = extensions; + def->parser = findMakeTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/matlab.c b/third_party/ctags/matlab.c new file mode 100644 index 000000000..9dfb193ad --- /dev/null +++ b/third_party/ctags/matlab.c @@ -0,0 +1,46 @@ +// clang-format off +/* +* $Id$ +* +* Copyright (c) 2008, David Fishburn +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for MATLAB language files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "third_party/ctags/parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installMatLabRegex (const langType language) +{ + /* function [x,y,z] = asdf */ + addTagRegex (language, "^function[ \t]*\\[.*\\][ \t]*=[ \t]*([a-zA-Z0-9_]+)", "\\1", "f,function", NULL); + /* function x = asdf */ + addTagRegex (language, "^function[ \t]*[a-zA-Z0-9_]+[ \t]*=[ \t]*([a-zA-Z0-9_]+)", "\\1", "f,function", NULL); + /* function asdf */ + addTagRegex (language, "^function[ \t]*([a-zA-Z0-9_]+)[^=]*$", "\\1", "f,function", NULL); +} + +extern parserDefinition* MatLabParser () +{ + static const char *const extensions [] = { "m", NULL }; + parserDefinition* const def = parserNew ("MatLab"); + def->extensions = extensions; + def->initialize = installMatLabRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/objc.c b/third_party/ctags/objc.c new file mode 100644 index 000000000..664436033 --- /dev/null +++ b/third_party/ctags/objc.c @@ -0,0 +1,1149 @@ +// clang-format off + +/* +* Copyright (c) 2010, Vincent Berthoux +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Objective C +* language files. +*/ +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* To get rid of unused parameter warning in + * -Wextra */ +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#elif defined(__LCLINT__) +# define UNUSED(x) /*@unused@*/ x +#else +# define UNUSED(x) x +#endif + +typedef enum { + K_INTERFACE, + K_IMPLEMENTATION, + K_PROTOCOL, + K_METHOD, + K_CLASSMETHOD, + K_VAR, + K_FIELD, + K_FUNCTION, + K_PROPERTY, + K_TYPEDEF, + K_STRUCT, + K_ENUM, + K_MACRO +} objcKind; + +static kindOption ObjcKinds[] = { + {TRUE, 'i', "interface", "class interface"}, + {TRUE, 'I', "implementation", "class implementation"}, + {TRUE, 'p', "protocol", "Protocol"}, + {TRUE, 'm', "method", "Object's method"}, + {TRUE, 'c', "class", "Class' method"}, + {TRUE, 'v', "var", "Global variable"}, + {TRUE, 'F', "field", "Object field"}, + {TRUE, 'f', "function", "A function"}, + {TRUE, 'p', "property", "A property"}, + {TRUE, 't', "typedef", "A type alias"}, + {TRUE, 's', "struct", "A type structure"}, + {TRUE, 'e', "enum", "An enumeration"}, + {TRUE, 'M', "macro", "A preprocessor macro"}, +}; + +typedef enum { + ObjcTYPEDEF, + ObjcSTRUCT, + ObjcENUM, + ObjcIMPLEMENTATION, + ObjcINTERFACE, + ObjcPROTOCOL, + ObjcENCODE, + ObjcSYNCHRONIZED, + ObjcSELECTOR, + ObjcPROPERTY, + ObjcEND, + ObjcDEFS, + ObjcCLASS, + ObjcPRIVATE, + ObjcPACKAGE, + ObjcPUBLIC, + ObjcPROTECTED, + ObjcSYNTHESIZE, + ObjcDYNAMIC, + ObjcOPTIONAL, + ObjcREQUIRED, + ObjcSTRING, + ObjcIDENTIFIER, + + Tok_COMA, /* ',' */ + Tok_PLUS, /* '+' */ + Tok_MINUS, /* '-' */ + Tok_PARL, /* '(' */ + Tok_PARR, /* ')' */ + Tok_CurlL, /* '{' */ + Tok_CurlR, /* '}' */ + Tok_SQUAREL, /* '[' */ + Tok_SQUARER, /* ']' */ + Tok_semi, /* ';' */ + Tok_dpoint, /* ':' */ + Tok_Sharp, /* '#' */ + Tok_Backslash, /* '\\' */ + Tok_EOL, /* '\r''\n' */ + Tok_any, + + Tok_EOF /* END of file */ +} objcKeyword; + +typedef objcKeyword objcToken; + +typedef struct sOBjcKeywordDesc { + const char *name; + objcKeyword id; +} objcKeywordDesc; + + +static const objcKeywordDesc objcKeywordTable[] = { + {"typedef", ObjcTYPEDEF}, + {"struct", ObjcSTRUCT}, + {"enum", ObjcENUM}, + {"@implementation", ObjcIMPLEMENTATION}, + {"@interface", ObjcINTERFACE}, + {"@protocol", ObjcPROTOCOL}, + {"@encode", ObjcENCODE}, + {"@property", ObjcPROPERTY}, + {"@synchronized", ObjcSYNCHRONIZED}, + {"@selector", ObjcSELECTOR}, + {"@end", ObjcEND}, + {"@defs", ObjcDEFS}, + {"@class", ObjcCLASS}, + {"@private", ObjcPRIVATE}, + {"@package", ObjcPACKAGE}, + {"@public", ObjcPUBLIC}, + {"@protected", ObjcPROTECTED}, + {"@synthesize", ObjcSYNTHESIZE}, + {"@dynamic", ObjcDYNAMIC}, + {"@optional", ObjcOPTIONAL}, + {"@required", ObjcREQUIRED}, +}; + +static langType Lang_ObjectiveC; + +/*////////////////////////////////////////////////////////////////// +//// lexingInit */ +typedef struct _lexingState { + vString *name; /* current parsed identifier/operator */ + const unsigned char *cp; /* position in stream */ +} lexingState; + +static void initKeywordHash (void) +{ + const size_t count = sizeof (objcKeywordTable) / sizeof (objcKeywordDesc); + size_t i; + + for (i = 0; i < count; ++i) + { + addKeyword (objcKeywordTable[i].name, Lang_ObjectiveC, + (int) objcKeywordTable[i].id); + } +} + +/*////////////////////////////////////////////////////////////////////// +//// Lexing */ +static boolean isNum (char c) +{ + return c >= '0' && c <= '9'; +} + +static boolean isLowerAlpha (char c) +{ + return c >= 'a' && c <= 'z'; +} + +static boolean isUpperAlpha (char c) +{ + return c >= 'A' && c <= 'Z'; +} + +static boolean isAlpha (char c) +{ + return isLowerAlpha (c) || isUpperAlpha (c); +} + +static boolean isIdent (char c) +{ + return isNum (c) || isAlpha (c) || c == '_'; +} + +static boolean isSpace (char c) +{ + return c == ' ' || c == '\t'; +} + +/* return true if it end with an end of line */ +static void eatWhiteSpace (lexingState * st) +{ + const unsigned char *cp = st->cp; + while (isSpace (*cp)) + cp++; + + st->cp = cp; +} + +static void eatString (lexingState * st) +{ + boolean lastIsBackSlash = FALSE; + boolean unfinished = TRUE; + const unsigned char *c = st->cp + 1; + + while (unfinished) + { + /* end of line should never happen. + * we tolerate it */ + if (c == NULL || c[0] == '\0') + break; + else if (*c == '"' && !lastIsBackSlash) + unfinished = FALSE; + else + lastIsBackSlash = *c == '\\'; + + c++; + } + + st->cp = c; +} + +static void eatComment (lexingState * st) +{ + boolean unfinished = TRUE; + boolean lastIsStar = FALSE; + const unsigned char *c = st->cp + 2; + + while (unfinished) + { + /* we've reached the end of the line.. + * so we have to reload a line... */ + if (c == NULL || *c == '\0') + { + st->cp = fileReadLine (); + /* WOOPS... no more input... + * we return, next lexing read + * will be null and ok */ + if (st->cp == NULL) + return; + c = st->cp; + } + /* we've reached the end of the comment */ + else if (*c == '/' && lastIsStar) + unfinished = FALSE; + else + { + lastIsStar = '*' == *c; + c++; + } + } + + st->cp = c; +} + +static void readIdentifier (lexingState * st) +{ + const unsigned char *p; + vStringClear (st->name); + + /* first char is a simple letter */ + if (isAlpha (*st->cp) || *st->cp == '_') + vStringPut (st->name, (int) *st->cp); + + /* Go till you get identifier chars */ + for (p = st->cp + 1; isIdent (*p); p++) + vStringPut (st->name, (int) *p); + + st->cp = p; + + vStringTerminate (st->name); +} + +/* read the @something directives */ +static void readIdentifierObjcDirective (lexingState * st) +{ + const unsigned char *p; + vStringClear (st->name); + + /* first char is a simple letter */ + if (*st->cp == '@') + vStringPut (st->name, (int) *st->cp); + + /* Go till you get identifier chars */ + for (p = st->cp + 1; isIdent (*p); p++) + vStringPut (st->name, (int) *p); + + st->cp = p; + + vStringTerminate (st->name); +} + +/* The lexer is in charge of reading the file. + * Some of sub-lexer (like eatComment) also read file. + * lexing is finished when the lexer return Tok_EOF */ +static objcKeyword lex (lexingState * st) +{ + int retType; + + /* handling data input here */ + while (st->cp == NULL || st->cp[0] == '\0') + { + st->cp = fileReadLine (); + if (st->cp == NULL) + return Tok_EOF; + + return Tok_EOL; + } + + if (isAlpha (*st->cp)) + { + readIdentifier (st); + retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC); + + if (retType == -1) /* If it's not a keyword */ + { + return ObjcIDENTIFIER; + } + else + { + return retType; + } + } + else if (*st->cp == '@') + { + readIdentifierObjcDirective (st); + retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC); + + if (retType == -1) /* If it's not a keyword */ + { + return Tok_any; + } + else + { + return retType; + } + } + else if (isSpace (*st->cp)) + { + eatWhiteSpace (st); + return lex (st); + } + else + switch (*st->cp) + { + case '(': + st->cp++; + return Tok_PARL; + + case '\\': + st->cp++; + return Tok_Backslash; + + case '#': + st->cp++; + return Tok_Sharp; + + case '/': + if (st->cp[1] == '*') /* ergl, a comment */ + { + eatComment (st); + return lex (st); + } + else if (st->cp[1] == '/') + { + st->cp = NULL; + return lex (st); + } + else + { + st->cp++; + return Tok_any; + } + break; + + case ')': + st->cp++; + return Tok_PARR; + case '{': + st->cp++; + return Tok_CurlL; + case '}': + st->cp++; + return Tok_CurlR; + case '[': + st->cp++; + return Tok_SQUAREL; + case ']': + st->cp++; + return Tok_SQUARER; + case ',': + st->cp++; + return Tok_COMA; + case ';': + st->cp++; + return Tok_semi; + case ':': + st->cp++; + return Tok_dpoint; + case '"': + eatString (st); + return Tok_any; + case '+': + st->cp++; + return Tok_PLUS; + case '-': + st->cp++; + return Tok_MINUS; + + default: + st->cp++; + break; + } + + /* default return if nothing is recognized, + * shouldn't happen, but at least, it will + * be handled without destroying the parsing. */ + return Tok_any; +} + +/*////////////////////////////////////////////////////////////////////// +//// Parsing */ +typedef void (*parseNext) (vString * const ident, objcToken what); + +/********** Helpers */ +/* This variable hold the 'parser' which is going to + * handle the next token */ +static parseNext toDoNext; + +/* Special variable used by parser eater to + * determine which action to put after their + * job is finished. */ +static parseNext comeAfter; + +/* Used by some parsers detecting certain token + * to revert to previous parser. */ +static parseNext fallback; + + +/********** Grammar */ +static void globalScope (vString * const ident, objcToken what); +static void parseMethods (vString * const ident, objcToken what); +static void parseImplemMethods (vString * const ident, objcToken what); +static vString *tempName = NULL; +static vString *parentName = NULL; +static objcKind parentType = K_INTERFACE; + +/* used to prepare tag for OCaml, just in case their is a need to + * add additional information to the tag. */ +static void prepareTag (tagEntryInfo * tag, vString const *name, objcKind kind) +{ + initTagEntry (tag, vStringValue (name)); + tag->kindName = ObjcKinds[kind].name; + tag->kind = ObjcKinds[kind].letter; + + if (parentName != NULL) + { + tag->extensionFields.scope[0] = ObjcKinds[parentType].name; + tag->extensionFields.scope[1] = vStringValue (parentName); + } +} + +void pushEnclosingContext (const vString * parent, objcKind type) +{ + vStringCopy (parentName, parent); + parentType = type; +} + +void popEnclosingContext () +{ + vStringClear (parentName); +} + +/* Used to centralise tag creation, and be able to add + * more information to it in the future */ +static void addTag (vString * const ident, int kind) +{ + tagEntryInfo toCreate; + prepareTag (&toCreate, ident, kind); + makeTagEntry (&toCreate); +} + +objcToken waitedToken, fallBackToken; + +/* Ignore everything till waitedToken and jump to comeAfter. + * If the "end" keyword is encountered break, doesn't remember + * why though. */ +static void tillToken (vString * const UNUSED (ident), objcToken what) +{ + if (what == waitedToken) + toDoNext = comeAfter; +} + +static void tillTokenOrFallBack (vString * const UNUSED (ident), objcToken what) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else if (what == fallBackToken) + { + toDoNext = fallback; + } +} + +static void ignoreBalanced (vString * const UNUSED (ident), objcToken what) +{ + static int count = 0; + + switch (what) + { + case Tok_PARL: + case Tok_CurlL: + case Tok_SQUAREL: + count++; + break; + + case Tok_PARR: + case Tok_CurlR: + case Tok_SQUARER: + count--; + break; + + default: + /* don't care */ + break; + } + + if (count == 0) + toDoNext = comeAfter; +} + +static void parseFields (vString * const ident, objcToken what) +{ + switch (what) + { + case Tok_CurlR: + toDoNext = &parseMethods; + break; + + case Tok_SQUAREL: + case Tok_PARL: + toDoNext = &ignoreBalanced; + comeAfter = &parseFields; + break; + + // we got an identifier, keep track + // of it + case ObjcIDENTIFIER: + vStringCopy (tempName, ident); + break; + + // our last kept identifier must be our + // variable name =) + case Tok_semi: + addTag (tempName, K_FIELD); + vStringClear (tempName); + break; + + default: + /* NOTHING */ + break; + } +} + +objcKind methodKind; + + +static vString *fullMethodName; +static vString *prevIdent; + +static void parseMethodsName (vString * const ident, objcToken what) +{ + switch (what) + { + case Tok_PARL: + toDoNext = &tillToken; + comeAfter = &parseMethodsName; + waitedToken = Tok_PARR; + break; + + case Tok_dpoint: + vStringCat (fullMethodName, prevIdent); + vStringCatS (fullMethodName, ":"); + vStringClear (prevIdent); + break; + + case ObjcIDENTIFIER: + vStringCopy (prevIdent, ident); + break; + + case Tok_CurlL: + case Tok_semi: + // method name is not simple + if (vStringLength (fullMethodName) != '\0') + { + addTag (fullMethodName, methodKind); + vStringClear (fullMethodName); + } + else + addTag (prevIdent, methodKind); + + toDoNext = &parseMethods; + parseImplemMethods (ident, what); + vStringClear (prevIdent); + break; + + default: + break; + } +} + +static void parseMethodsImplemName (vString * const ident, objcToken what) +{ + switch (what) + { + case Tok_PARL: + toDoNext = &tillToken; + comeAfter = &parseMethodsImplemName; + waitedToken = Tok_PARR; + break; + + case Tok_dpoint: + vStringCat (fullMethodName, prevIdent); + vStringCatS (fullMethodName, ":"); + vStringClear (prevIdent); + break; + + case ObjcIDENTIFIER: + vStringCopy (prevIdent, ident); + break; + + case Tok_CurlL: + case Tok_semi: + // method name is not simple + if (vStringLength (fullMethodName) != '\0') + { + addTag (fullMethodName, methodKind); + vStringClear (fullMethodName); + } + else + addTag (prevIdent, methodKind); + + toDoNext = &parseImplemMethods; + parseImplemMethods (ident, what); + vStringClear (prevIdent); + break; + + default: + break; + } +} + +static void parseImplemMethods (vString * const ident, objcToken what) +{ + switch (what) + { + case Tok_PLUS: /* + */ + toDoNext = &parseMethodsImplemName; + methodKind = K_CLASSMETHOD; + break; + + case Tok_MINUS: /* - */ + toDoNext = &parseMethodsImplemName; + methodKind = K_METHOD; + break; + + case ObjcEND: /* @end */ + popEnclosingContext (); + toDoNext = &globalScope; + break; + + case Tok_CurlL: /* { */ + toDoNext = &ignoreBalanced; + ignoreBalanced (ident, what); + comeAfter = &parseImplemMethods; + break; + + default: + break; + } +} + +static void parseProperty (vString * const ident, objcToken what) +{ + switch (what) + { + case Tok_PARL: + toDoNext = &tillToken; + comeAfter = &parseProperty; + waitedToken = Tok_PARR; + break; + + // we got an identifier, keep track + // of it + case ObjcIDENTIFIER: + vStringCopy (tempName, ident); + break; + + // our last kept identifier must be our + // variable name =) + case Tok_semi: + addTag (tempName, K_PROPERTY); + vStringClear (tempName); + break; + + default: + break; + } +} + +static void parseMethods (vString * const UNUSED (ident), objcToken what) +{ + switch (what) + { + case Tok_PLUS: /* + */ + toDoNext = &parseMethodsName; + methodKind = K_CLASSMETHOD; + break; + + case Tok_MINUS: /* - */ + toDoNext = &parseMethodsName; + methodKind = K_METHOD; + break; + + case ObjcPROPERTY: + toDoNext = &parseProperty; + break; + + case ObjcEND: /* @end */ + popEnclosingContext (); + toDoNext = &globalScope; + break; + + case Tok_CurlL: /* { */ + toDoNext = &parseFields; + break; + + default: + break; + } +} + + +static void parseProtocol (vString * const ident, objcToken what) +{ + if (what == ObjcIDENTIFIER) + { + pushEnclosingContext (ident, K_PROTOCOL); + addTag (ident, K_PROTOCOL); + } + toDoNext = &parseMethods; +} + +static void parseImplementation (vString * const ident, objcToken what) +{ + if (what == ObjcIDENTIFIER) + { + addTag (ident, K_IMPLEMENTATION); + pushEnclosingContext (ident, K_IMPLEMENTATION); + } + toDoNext = &parseImplemMethods; +} + +static void parseInterface (vString * const ident, objcToken what) +{ + if (what == ObjcIDENTIFIER) + { + addTag (ident, K_INTERFACE); + pushEnclosingContext (ident, K_INTERFACE); + } + + toDoNext = &parseMethods; +} + +static void parseStructMembers (vString * const ident, objcToken what) +{ + static parseNext prev = NULL; + + if (prev != NULL) + { + comeAfter = prev; + prev = NULL; + } + + switch (what) + { + case ObjcIDENTIFIER: + vStringCopy (tempName, ident); + break; + + case Tok_semi: /* ';' */ + addTag (tempName, K_FIELD); + vStringClear (tempName); + break; + + // some types are complex, the only one + // we will loose is the function type. + case Tok_CurlL: /* '{' */ + case Tok_PARL: /* '(' */ + case Tok_SQUAREL: /* '[' */ + toDoNext = &ignoreBalanced; + prev = comeAfter; + comeAfter = &parseStructMembers; + ignoreBalanced (ident, what); + break; + + case Tok_CurlR: + toDoNext = comeAfter; + break; + + default: + /* don't care */ + break; + } +} + +/* Called just after the struct keyword */ +static void parseStruct (vString * const ident, objcToken what) +{ + static boolean gotName = FALSE; + + switch (what) + { + case ObjcIDENTIFIER: + if (!gotName) + { + addTag (ident, K_STRUCT); + pushEnclosingContext (ident, K_STRUCT); + gotName = TRUE; + } + else + { + gotName = FALSE; + popEnclosingContext (); + toDoNext = comeAfter; + comeAfter (ident, what); + } + break; + + case Tok_CurlL: + toDoNext = &parseStructMembers; + break; + + /* maybe it was just a forward declaration + * in which case, we pop the context */ + case Tok_semi: + if (gotName) + popEnclosingContext (); + + toDoNext = comeAfter; + comeAfter (ident, what); + break; + + default: + /* we don't care */ + break; + } +} + +/* Parse enumeration members, ignoring potential initialization */ +static void parseEnumFields (vString * const ident, objcToken what) +{ + static parseNext prev = NULL; + + if (prev != NULL) + { + comeAfter = prev; + prev = NULL; + } + + switch (what) + { + case ObjcIDENTIFIER: + addTag (ident, K_ENUM); + prev = comeAfter; + waitedToken = Tok_COMA; + /* last item might not have a coma */ + fallBackToken = Tok_CurlR; + fallback = comeAfter; + comeAfter = parseEnumFields; + toDoNext = &tillTokenOrFallBack; + break; + + case Tok_CurlR: + toDoNext = comeAfter; + popEnclosingContext (); + break; + + default: + /* don't care */ + break; + } +} + +/* parse enum ... { ... */ +static void parseEnum (vString * const ident, objcToken what) +{ + static boolean named = FALSE; + + switch (what) + { + case ObjcIDENTIFIER: + if (!named) + { + addTag (ident, K_ENUM); + pushEnclosingContext (ident, K_ENUM); + named = TRUE; + } + else + { + named = FALSE; + popEnclosingContext (); + toDoNext = comeAfter; + comeAfter (ident, what); + } + break; + + case Tok_CurlL: /* '{' */ + toDoNext = &parseEnumFields; + named = FALSE; + break; + + case Tok_semi: /* ';' */ + if (named) + popEnclosingContext (); + toDoNext = comeAfter; + comeAfter (ident, what); + break; + + default: + /* don't care */ + break; + } +} + +/* Parse something like + * typedef .... ident ; + * ignoring the defined type but in the case of struct, + * in which case struct are parsed. + */ +static void parseTypedef (vString * const ident, objcToken what) +{ + switch (what) + { + case ObjcSTRUCT: + toDoNext = &parseStruct; + comeAfter = &parseTypedef; + break; + + case ObjcENUM: + toDoNext = &parseEnum; + comeAfter = &parseTypedef; + break; + + case ObjcIDENTIFIER: + vStringCopy (tempName, ident); + break; + + case Tok_semi: /* ';' */ + addTag (tempName, K_TYPEDEF); + vStringClear (tempName); + toDoNext = &globalScope; + break; + + default: + /* we don't care */ + break; + } +} + +static void ignorePreprocStuff (vString * const UNUSED (ident), objcToken what) +{ + static boolean escaped = FALSE; + + switch (what) + { + case Tok_Backslash: + escaped = TRUE; + break; + + case Tok_EOL: + if (escaped) + { + escaped = FALSE; + } + else + { + toDoNext = &globalScope; + } + break; + + default: + escaped = FALSE; + break; + } +} + +static void parseMacroName (vString * const ident, objcToken what) +{ + if (what == ObjcIDENTIFIER) + addTag (ident, K_MACRO); + + toDoNext = &ignorePreprocStuff; +} + +static void parsePreproc (vString * const ident, objcToken what) +{ + switch (what) + { + case ObjcIDENTIFIER: + if (strcmp (vStringValue (ident), "define") == 0) + toDoNext = &parseMacroName; + else + toDoNext = &ignorePreprocStuff; + break; + + default: + toDoNext = &ignorePreprocStuff; + break; + } +} + +/* Handle the "strong" top levels, all 'big' declarations + * happen here */ +static void globalScope (vString * const ident, objcToken what) +{ + switch (what) + { + case Tok_Sharp: + toDoNext = &parsePreproc; + break; + + case ObjcSTRUCT: + toDoNext = &parseStruct; + comeAfter = &globalScope; + break; + + case ObjcIDENTIFIER: + /* we keep track of the identifier if we + * come across a function. */ + vStringCopy (tempName, ident); + break; + + case Tok_PARL: + /* if we find an opening parenthesis it means we + * found a function (or a macro...) */ + addTag (tempName, K_FUNCTION); + vStringClear (tempName); + comeAfter = &globalScope; + toDoNext = &ignoreBalanced; + ignoreBalanced (ident, what); + break; + + case ObjcINTERFACE: + toDoNext = &parseInterface; + break; + + case ObjcIMPLEMENTATION: + toDoNext = &parseImplementation; + break; + + case ObjcPROTOCOL: + toDoNext = &parseProtocol; + break; + + case ObjcTYPEDEF: + toDoNext = parseTypedef; + comeAfter = &globalScope; + break; + + case Tok_CurlL: + comeAfter = &globalScope; + toDoNext = &ignoreBalanced; + ignoreBalanced (ident, what); + break; + + case ObjcEND: + case ObjcPUBLIC: + case ObjcPROTECTED: + case ObjcPRIVATE: + + default: + /* we don't care */ + break; + } +} + +/*//////////////////////////////////////////////////////////////// +//// Deal with the system */ + +static void findObjcTags (void) +{ + vString *name = vStringNew (); + lexingState st; + objcToken tok; + + parentName = vStringNew (); + tempName = vStringNew (); + fullMethodName = vStringNew (); + prevIdent = vStringNew (); + + st.name = vStringNew (); + st.cp = fileReadLine (); + toDoNext = &globalScope; + tok = lex (&st); + while (tok != Tok_EOF) + { + (*toDoNext) (st.name, tok); + tok = lex (&st); + } + + vStringDelete (name); + vStringDelete (parentName); + vStringDelete (tempName); + vStringDelete (fullMethodName); + vStringDelete (prevIdent); + parentName = NULL; + tempName = NULL; + prevIdent = NULL; + fullMethodName = NULL; +} + +static void objcInitialize (const langType language) +{ + Lang_ObjectiveC = language; + + initKeywordHash (); +} + +extern parserDefinition *ObjcParser (void) +{ + static const char *const extensions[] = { "m", "h", NULL }; + parserDefinition *def = parserNew ("ObjectiveC"); + def->kinds = ObjcKinds; + def->kindCount = KIND_COUNT (ObjcKinds); + def->extensions = extensions; + def->parser = findObjcTags; + def->initialize = objcInitialize; + + return def; +} diff --git a/third_party/ctags/ocaml.c b/third_party/ctags/ocaml.c new file mode 100644 index 000000000..fbae12755 --- /dev/null +++ b/third_party/ctags/ocaml.c @@ -0,0 +1,1894 @@ +// clang-format off +/* +* Copyright (c) 2009, Vincent Berthoux +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Objective Caml +* language files. +*/ +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* To get rid of unused parameter warning in + * -Wextra */ +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#elif defined(__LCLINT__) +# define UNUSED(x) /*@unused@*/ x +#else +# define UNUSED(x) x +#endif +#define OCAML_MAX_STACK_SIZE 256 + +typedef enum { + K_CLASS, /* Ocaml class, relatively rare */ + K_METHOD, /* class method */ + K_MODULE, /* Ocaml module OR functor */ + K_VAR, + K_TYPE, /* name of an OCaml type */ + K_FUNCTION, + K_CONSTRUCTOR, /* Constructor of a sum type */ + K_RECORDFIELD, + K_EXCEPTION +} ocamlKind; + +static kindOption OcamlKinds[] = { + {TRUE, 'c', "class", "classes"}, + {TRUE, 'm', "method", "Object's method"}, + {TRUE, 'M', "module", "Module or functor"}, + {TRUE, 'v', "var", "Global variable"}, + {TRUE, 't', "type", "Type name"}, + {TRUE, 'f', "function", "A function"}, + {TRUE, 'C', "Constructor", "A constructor"}, + {TRUE, 'r', "Record field", "A 'structure' field"}, + {TRUE, 'e', "Exception", "An exception"} +}; + +typedef enum { + OcaKEYWORD_and, + OcaKEYWORD_begin, + OcaKEYWORD_class, + OcaKEYWORD_do, + OcaKEYWORD_done, + OcaKEYWORD_else, + OcaKEYWORD_end, + OcaKEYWORD_exception, + OcaKEYWORD_for, + OcaKEYWORD_functor, + OcaKEYWORD_fun, + OcaKEYWORD_if, + OcaKEYWORD_in, + OcaKEYWORD_let, + OcaKEYWORD_value, + OcaKEYWORD_match, + OcaKEYWORD_method, + OcaKEYWORD_module, + OcaKEYWORD_mutable, + OcaKEYWORD_object, + OcaKEYWORD_of, + OcaKEYWORD_rec, + OcaKEYWORD_sig, + OcaKEYWORD_struct, + OcaKEYWORD_then, + OcaKEYWORD_try, + OcaKEYWORD_type, + OcaKEYWORD_val, + OcaKEYWORD_virtual, + OcaKEYWORD_while, + OcaKEYWORD_with, + + OcaIDENTIFIER, + Tok_PARL, /* '(' */ + Tok_PARR, /* ')' */ + Tok_BRL, /* '[' */ + Tok_BRR, /* ']' */ + Tok_CurlL, /* '{' */ + Tok_CurlR, /* '}' */ + Tok_Prime, /* '\'' */ + Tok_Pipe, /* '|' */ + Tok_EQ, /* '=' */ + Tok_Val, /* string/number/poo */ + Tok_Op, /* any operator recognized by the language */ + Tok_semi, /* ';' */ + Tok_comma, /* ',' */ + Tok_To, /* '->' */ + Tok_Sharp, /* '#' */ + Tok_Backslash, /* '\\' */ + + Tok_EOF /* END of file */ +} ocamlKeyword; + +typedef struct sOcaKeywordDesc { + const char *name; + ocamlKeyword id; +} ocaKeywordDesc; + +typedef ocamlKeyword ocaToken; + +static const ocaKeywordDesc OcamlKeywordTable[] = { + { "and" , OcaKEYWORD_and }, + { "begin" , OcaKEYWORD_begin }, + { "class" , OcaKEYWORD_class }, + { "do" , OcaKEYWORD_do }, + { "done" , OcaKEYWORD_done }, + { "else" , OcaKEYWORD_else }, + { "end" , OcaKEYWORD_end }, + { "exception" , OcaKEYWORD_exception }, + { "for" , OcaKEYWORD_for }, + { "fun" , OcaKEYWORD_fun }, + { "function" , OcaKEYWORD_fun }, + { "functor" , OcaKEYWORD_functor }, + { "in" , OcaKEYWORD_in }, + { "let" , OcaKEYWORD_let }, + { "match" , OcaKEYWORD_match }, + { "method" , OcaKEYWORD_method }, + { "module" , OcaKEYWORD_module }, + { "mutable" , OcaKEYWORD_mutable }, + { "object" , OcaKEYWORD_object }, + { "of" , OcaKEYWORD_of }, + { "rec" , OcaKEYWORD_rec }, + { "sig" , OcaKEYWORD_sig }, + { "struct" , OcaKEYWORD_struct }, + { "then" , OcaKEYWORD_then }, + { "try" , OcaKEYWORD_try }, + { "type" , OcaKEYWORD_type }, + { "val" , OcaKEYWORD_val }, + { "value" , OcaKEYWORD_value }, /* just to handle revised syntax */ + { "virtual" , OcaKEYWORD_virtual }, + { "while" , OcaKEYWORD_while }, + { "with" , OcaKEYWORD_with }, + + { "or" , Tok_Op }, + { "mod " , Tok_Op }, + { "land " , Tok_Op }, + { "lor " , Tok_Op }, + { "lxor " , Tok_Op }, + { "lsl " , Tok_Op }, + { "lsr " , Tok_Op }, + { "asr" , Tok_Op }, + { "->" , Tok_To }, + { "true" , Tok_Val }, + { "false" , Tok_Val } +}; + +static langType Lang_Ocaml; + +boolean exportLocalInfo = FALSE; + +/*////////////////////////////////////////////////////////////////// +//// lexingInit */ +typedef struct _lexingState { + vString *name; /* current parsed identifier/operator */ + const unsigned char *cp; /* position in stream */ +} lexingState; + +/* array of the size of all possible value for a char */ +boolean isOperator[1 << (8 * sizeof (char))] = { FALSE }; + +static void initKeywordHash ( void ) +{ + const size_t count = sizeof (OcamlKeywordTable) / sizeof (ocaKeywordDesc); + size_t i; + + for (i = 0; i < count; ++i) + { + addKeyword (OcamlKeywordTable[i].name, Lang_Ocaml, + (int) OcamlKeywordTable[i].id); + } +} + +/* definition of all the operator in OCaml, + * /!\ certain operator get special treatment + * in regards of their role in OCaml grammar : + * '|' ':' '=' '~' and '?' */ +static void initOperatorTable ( void ) +{ + isOperator['!'] = TRUE; + isOperator['$'] = TRUE; + isOperator['%'] = TRUE; + isOperator['&'] = TRUE; + isOperator['*'] = TRUE; + isOperator['+'] = TRUE; + isOperator['-'] = TRUE; + isOperator['.'] = TRUE; + isOperator['/'] = TRUE; + isOperator[':'] = TRUE; + isOperator['<'] = TRUE; + isOperator['='] = TRUE; + isOperator['>'] = TRUE; + isOperator['?'] = TRUE; + isOperator['@'] = TRUE; + isOperator['^'] = TRUE; + isOperator['~'] = TRUE; + isOperator['|'] = TRUE; +} + +/*////////////////////////////////////////////////////////////////////// +//// Lexing */ +static boolean isNum (char c) +{ + return c >= '0' && c <= '9'; +} +static boolean isLowerAlpha (char c) +{ + return c >= 'a' && c <= 'z'; +} + +static boolean isUpperAlpha (char c) +{ + return c >= 'A' && c <= 'Z'; +} + +static boolean isAlpha (char c) +{ + return isLowerAlpha (c) || isUpperAlpha (c); +} + +static boolean isIdent (char c) +{ + return isNum (c) || isAlpha (c) || c == '_' || c == '\''; +} + +static boolean isSpace (char c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +static void eatWhiteSpace (lexingState * st) +{ + const unsigned char *cp = st->cp; + while (isSpace (*cp)) + cp++; + + st->cp = cp; +} + +static void eatString (lexingState * st) +{ + boolean lastIsBackSlash = FALSE; + boolean unfinished = TRUE; + const unsigned char *c = st->cp + 1; + + while (unfinished) + { + /* end of line should never happen. + * we tolerate it */ + if (c == NULL || c[0] == '\0') + break; + else if (*c == '"' && !lastIsBackSlash) + unfinished = FALSE; + else + lastIsBackSlash = *c == '\\'; + + c++; + } + + st->cp = c; +} + +static void eatComment (lexingState * st) +{ + boolean unfinished = TRUE; + boolean lastIsStar = FALSE; + const unsigned char *c = st->cp + 2; + + while (unfinished) + { + /* we've reached the end of the line.. + * so we have to reload a line... */ + if (c == NULL || *c == '\0') + { + st->cp = fileReadLine (); + /* WOOPS... no more input... + * we return, next lexing read + * will be null and ok */ + if (st->cp == NULL) + return; + c = st->cp; + } + /* we've reached the end of the comment */ + else if (*c == ')' && lastIsStar) + unfinished = FALSE; + /* here we deal with imbricated comment, which + * are allowed in OCaml */ + else if (c[0] == '(' && c[1] == '*') + { + st->cp = c; + eatComment (st); + + c = st->cp; + if (c == NULL) + return; + + lastIsStar = FALSE; + c++; + } + /* OCaml has a rule which says : + * + * "Comments do not occur inside string or character literals. + * Nested comments are handled correctly." + * + * So if we encounter a string beginning, we must parse it to + * get a good comment nesting (bug ID: 3117537) + */ + else if (*c == '"') + { + st->cp = c; + eatString (st); + c = st->cp; + } + else + { + lastIsStar = '*' == *c; + c++; + } + } + + st->cp = c; +} + +static void readIdentifier (lexingState * st) +{ + const unsigned char *p; + vStringClear (st->name); + + /* first char is a simple letter */ + if (isAlpha (*st->cp) || *st->cp == '_') + vStringPut (st->name, (int) *st->cp); + + /* Go till you get identifier chars */ + for (p = st->cp + 1; isIdent (*p); p++) + vStringPut (st->name, (int) *p); + + st->cp = p; + + vStringTerminate (st->name); +} + +static ocamlKeyword eatNumber (lexingState * st) +{ + while (isNum (*st->cp)) + st->cp++; + return Tok_Val; +} + +/* Operator can be defined in OCaml as a function + * so we must be ample enough to parse them normally */ +static ocamlKeyword eatOperator (lexingState * st) +{ + int count = 0; + const unsigned char *root = st->cp; + + vStringClear (st->name); + + while (isOperator[st->cp[count]]) + { + vStringPut (st->name, st->cp[count]); + count++; + } + + vStringTerminate (st->name); + + st->cp += count; + if (count <= 1) + { + switch (root[0]) + { + case '|': + return Tok_Pipe; + case '=': + return Tok_EQ; + default: + return Tok_Op; + } + } + else if (count == 2 && root[0] == '-' && root[1] == '>') + return Tok_To; + else + return Tok_Op; +} + +/* The lexer is in charge of reading the file. + * Some of sub-lexer (like eatComment) also read file. + * lexing is finished when the lexer return Tok_EOF */ +static ocamlKeyword lex (lexingState * st) +{ + int retType; + /* handling data input here */ + while (st->cp == NULL || st->cp[0] == '\0') + { + st->cp = fileReadLine (); + if (st->cp == NULL) + return Tok_EOF; + } + + if (isAlpha (*st->cp)) + { + readIdentifier (st); + retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml); + + if (retType == -1) /* If it's not a keyword */ + { + return OcaIDENTIFIER; + } + else + { + return retType; + } + } + else if (isNum (*st->cp)) + return eatNumber (st); + else if (isSpace (*st->cp)) + { + eatWhiteSpace (st); + return lex (st); + } + /* OCaml permit the definition of our own operators + * so here we check all the consecuting chars which + * are operators to discard them. */ + else if (isOperator[*st->cp]) + return eatOperator (st); + else + switch (*st->cp) + { + case '(': + if (st->cp[1] == '*') /* ergl, a comment */ + { + eatComment (st); + return lex (st); + } + else + { + st->cp++; + return Tok_PARL; + } + + case ')': + st->cp++; + return Tok_PARR; + case '[': + st->cp++; + return Tok_BRL; + case ']': + st->cp++; + return Tok_BRR; + case '{': + st->cp++; + return Tok_CurlL; + case '}': + st->cp++; + return Tok_CurlR; + case '\'': + st->cp++; + return Tok_Prime; + case ',': + st->cp++; + return Tok_comma; + case '=': + st->cp++; + return Tok_EQ; + case ';': + st->cp++; + return Tok_semi; + case '"': + eatString (st); + return Tok_Val; + case '_': + st->cp++; + return Tok_Val; + case '#': + st->cp++; + return Tok_Sharp; + case '\\': + st->cp++; + return Tok_Backslash; + + default: + st->cp++; + break; + } + + /* default return if nothing is recognized, + * shouldn't happen, but at least, it will + * be handled without destroying the parsing. */ + return Tok_Val; +} + +/*////////////////////////////////////////////////////////////////////// +//// Parsing */ +typedef void (*parseNext) (vString * const ident, ocaToken what); + +/********** Helpers */ +/* This variable hold the 'parser' which is going to + * handle the next token */ +static parseNext toDoNext; + +/* Special variable used by parser eater to + * determine which action to put after their + * job is finished. */ +static parseNext comeAfter; + +/* If a token put an end to current delcaration/ + * statement */ +static ocaToken terminatingToken; + +/* Token to be searched by the different + * parser eater. */ +static ocaToken waitedToken; + +/* name of the last class, used for + * context stacking. */ +static vString *lastClass; + +static vString *voidName; + +typedef enum _sContextKind { + ContextStrong, + ContextSoft +} contextKind; + +typedef enum _sContextType { + ContextType, + ContextModule, + ContextClass, + ContextValue, + ContextFunction, + ContextMethod, + ContextBlock +} contextType; + +typedef struct _sOcamlContext { + contextKind kind; /* well if the context is strong or not */ + contextType type; + parseNext callback; /* what to do when a context is pop'd */ + vString *contextName; /* name, if any, of the surrounding context */ +} ocamlContext; + +/* context stack, can be used to output scope information + * into the tag file. */ +static ocamlContext stack[OCAML_MAX_STACK_SIZE]; +/* current position in the tag */ +static int stackIndex; + +/* special function, often recalled, so putting it here */ +static void globalScope (vString * const ident, ocaToken what); + +/* Return : index of the last named context if one + * is found, -1 otherwise */ +static int getLastNamedIndex ( void ) +{ + int i; + + for (i = stackIndex - 1; i >= 0; --i) + { + if (vStringLength (stack[i].contextName) > 0) + { + return i; + } + } + + return -1; +} + +static const char *contextDescription (contextType t) +{ + switch (t) + { + case ContextFunction: + return "function"; + case ContextMethod: + return "method"; + case ContextValue: + return "value"; + case ContextModule: + return "Module"; + case ContextType: + return "type"; + case ContextClass: + return "class"; + case ContextBlock: + return "begin/end"; + } + + return NULL; +} + +static char contextTypeSuffix (contextType t) +{ + switch (t) + { + case ContextFunction: + case ContextMethod: + case ContextValue: + case ContextModule: + return '/'; + case ContextType: + return '.'; + case ContextClass: + return '#'; + case ContextBlock: + return ' '; + } + + return '$'; +} + +/* Push a new context, handle null string */ +static void pushContext (contextKind kind, contextType type, parseNext after, + vString const *contextName) +{ + int parentIndex; + + if (stackIndex >= OCAML_MAX_STACK_SIZE) + { + verbose ("OCaml Maximum depth reached"); + return; + } + + + stack[stackIndex].kind = kind; + stack[stackIndex].type = type; + stack[stackIndex].callback = after; + + parentIndex = getLastNamedIndex (); + if (contextName == NULL) + { + vStringClear (stack[stackIndex++].contextName); + return; + } + + if (parentIndex >= 0) + { + vStringCopy (stack[stackIndex].contextName, + stack[parentIndex].contextName); + vStringPut (stack[stackIndex].contextName, + contextTypeSuffix (stack[parentIndex].type)); + + vStringCat (stack[stackIndex].contextName, contextName); + } + else + vStringCopy (stack[stackIndex].contextName, contextName); + + stackIndex++; +} + +static void pushStrongContext (vString * name, contextType type) +{ + pushContext (ContextStrong, type, &globalScope, name); +} + +static void pushSoftContext (parseNext continuation, + vString * name, contextType type) +{ + pushContext (ContextSoft, type, continuation, name); +} + +static void pushEmptyContext (parseNext continuation) +{ + pushContext (ContextSoft, ContextValue, continuation, NULL); +} + +/* unroll the stack until the last named context. + * then discard it. Used to handle the : + * let f x y = ... + * in ... + * where the context is reseted after the in. Context may have + * been really nested before that. */ +static void popLastNamed ( void ) +{ + int i = getLastNamedIndex (); + + if (i >= 0) + { + stackIndex = i; + toDoNext = stack[i].callback; + vStringClear (stack[i].contextName); + } + else + { + /* ok, no named context found... + * (should not happen). */ + stackIndex = 0; + toDoNext = &globalScope; + } +} + +/* pop a context without regarding it's content + * (beside handling empty stack case) */ +static void popSoftContext ( void ) +{ + if (stackIndex <= 0) + { + toDoNext = &globalScope; + } + else + { + stackIndex--; + toDoNext = stack[stackIndex].callback; + vStringClear (stack[stackIndex].contextName); + } +} + +/* Reset everything until the last global space. + * a strong context can be : + * - module + * - class definition + * - the initial global space + * - a _global_ delcaration (let at global scope or in a module). + * Created to exit quickly deeply nested context */ +static contextType popStrongContext ( void ) +{ + int i; + + for (i = stackIndex - 1; i >= 0; --i) + { + if (stack[i].kind == ContextStrong) + { + stackIndex = i; + toDoNext = stack[i].callback; + vStringClear (stack[i].contextName); + return stack[i].type; + } + } + /* ok, no strong context found... */ + stackIndex = 0; + toDoNext = &globalScope; + return -1; +} + +/* Ignore everything till waitedToken and jump to comeAfter. + * If the "end" keyword is encountered break, doesn't remember + * why though. */ +static void tillToken (vString * const UNUSED (ident), ocaToken what) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else if (what == OcaKEYWORD_end) + { + popStrongContext (); + toDoNext = &globalScope; + } +} + +/* Ignore everything till a waitedToken is seen, but + * take care of balanced parentheses/bracket use */ +static void contextualTillToken (vString * const UNUSED (ident), ocaToken what) +{ + static int parentheses = 0; + static int bracket = 0; + static int curly = 0; + + switch (what) + { + case Tok_PARL: + parentheses--; + break; + case Tok_PARR: + parentheses++; + break; + case Tok_CurlL: + curly--; + break; + case Tok_CurlR: + curly++; + break; + case Tok_BRL: + bracket--; + break; + case Tok_BRR: + bracket++; + break; + + default: /* other token are ignored */ + break; + } + + if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0) + toDoNext = comeAfter; + + else if (what == OcaKEYWORD_end) + { + popStrongContext (); + toDoNext = &globalScope; + } +} + +/* Wait for waitedToken and jump to comeAfter or let + * the globalScope handle declarations */ +static void tillTokenOrFallback (vString * const ident, ocaToken what) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else + globalScope (ident, what); +} + +/* ignore token till waitedToken, or give up if find + * terminatingToken. Use globalScope to handle new + * declarations. */ +static void tillTokenOrTerminatingOrFallback (vString * const ident, + ocaToken what) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else if (what == terminatingToken) + toDoNext = globalScope; + else + globalScope (ident, what); +} + +/* ignore the next token in the stream and jump to the + * given comeAfter state */ +static void ignoreToken (vString * const UNUSED (ident), ocaToken UNUSED (what)) +{ + toDoNext = comeAfter; +} + +/********** Grammar */ +/* the purpose of each function is detailled near their + * implementation */ + +static void killCurrentState ( void ) +{ + + /* Tracking the kind of previous strong + * context, if it doesn't match with a + * really strong entity, repop */ + switch (popStrongContext ()) + { + + case ContextValue: + popStrongContext (); + break; + case ContextFunction: + popStrongContext (); + break; + case ContextMethod: + popStrongContext (); + break; + + case ContextType: + popStrongContext(); + break; + case ContextBlock: + break; + case ContextModule: + break; + case ContextClass: + break; + default: + /* nothing more */ + break; + } +} + +/* used to prepare tag for OCaml, just in case their is a need to + * add additional information to the tag. */ +static void prepareTag (tagEntryInfo * tag, vString const *name, ocamlKind kind) +{ + int parentIndex; + + initTagEntry (tag, vStringValue (name)); + tag->kindName = OcamlKinds[kind].name; + tag->kind = OcamlKinds[kind].letter; + + if (kind == K_MODULE) + { + tag->lineNumberEntry = TRUE; + tag->lineNumber = 1; + } + parentIndex = getLastNamedIndex (); + if (parentIndex >= 0) + { + tag->extensionFields.scope[0] = + contextDescription (stack[parentIndex].type); + tag->extensionFields.scope[1] = + vStringValue (stack[parentIndex].contextName); + } +} + +/* Used to centralise tag creation, and be able to add + * more information to it in the future */ +static void addTag (vString * const ident, int kind) +{ + if (OcamlKinds [kind].enabled && ident != NULL && vStringLength (ident) > 0) + { + tagEntryInfo toCreate; + prepareTag (&toCreate, ident, kind); + makeTagEntry (&toCreate); + } +} + +boolean needStrongPoping = FALSE; +static void requestStrongPoping ( void ) +{ + needStrongPoping = TRUE; +} + +static void cleanupPreviousParser ( void ) +{ + if (needStrongPoping) + { + needStrongPoping = FALSE; + popStrongContext (); + } +} + +/* Due to some circular dependencies, the following functions + * must be forward-declared. */ +static void letParam (vString * const ident, ocaToken what); +static void localScope (vString * const ident, ocaToken what); +static void mayRedeclare (vString * const ident, ocaToken what); +static void typeSpecification (vString * const ident, ocaToken what); + +/* + * Parse a record type + * type ident = // parsed previously + * { + * ident1: type1; + * ident2: type2; + * } + */ +static void typeRecord (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaIDENTIFIER: + addTag (ident, K_RECORDFIELD); + terminatingToken = Tok_CurlR; + waitedToken = Tok_semi; + comeAfter = &typeRecord; + toDoNext = &tillTokenOrTerminatingOrFallback; + break; + + case OcaKEYWORD_mutable: + /* ignore it */ + break; + + case Tok_CurlR: + popStrongContext (); + toDoNext = &globalScope; + break; + + default: /* don't care */ + break; + } +} + +/* handle : + * exception ExceptionName of ... */ +static void exceptionDecl (vString * const ident, ocaToken what) +{ + if (what == OcaIDENTIFIER) + { + addTag (ident, K_EXCEPTION); + } + else /* probably ill-formed, give back to global scope */ + { + globalScope (ident, what); + } + toDoNext = &globalScope; +} + +tagEntryInfo tempTag; +vString *tempIdent; + +/* Ensure a constructor is not a type path beginning + * with a module */ +static void constructorValidation (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_Op: /* if we got a '.' which is an operator */ + toDoNext = &globalScope; + popStrongContext (); + needStrongPoping = FALSE; + break; + + case OcaKEYWORD_of: /* OK, it must be a constructor :) */ + makeTagEntry (&tempTag); + vStringClear (tempIdent); + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + break; + + case Tok_Pipe: /* OK, it was a constructor :) */ + makeTagEntry (&tempTag); + vStringClear (tempIdent); + toDoNext = &typeSpecification; + break; + + default: /* and mean that we're not facing a module name */ + makeTagEntry (&tempTag); + vStringClear (tempIdent); + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + + /* nothing in the context, discard it */ + popStrongContext (); + + /* to be sure we use this token */ + globalScope (ident, what); + } +} + + +/* Parse beginning of type definition + * type 'avar ident = + * or + * type ('var1, 'var2) ident = + */ +static void typeDecl (vString * const ident, ocaToken what) +{ + switch (what) + { + /* parameterized */ + case Tok_Prime: + comeAfter = &typeDecl; + toDoNext = &ignoreToken; + break; + /* LOTS of parameters */ + case Tok_PARL: + comeAfter = &typeDecl; + waitedToken = Tok_PARR; + toDoNext = &tillToken; + break; + + case OcaIDENTIFIER: + addTag (ident, K_TYPE); + pushStrongContext (ident, ContextType); + requestStrongPoping (); + waitedToken = Tok_EQ; + comeAfter = &typeSpecification; + toDoNext = &tillTokenOrFallback; + break; + + default: + globalScope (ident, what); + } +} + +/* Parse type of kind + * type bidule = Ctor1 of ... + * | Ctor2 + * | Ctor3 of ... + * or + * type bidule = | Ctor1 of ... | Ctor2 + * + * when type bidule = { ... } is detected, + * let typeRecord handle it. */ +static void typeSpecification (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaIDENTIFIER: + if (isUpperAlpha (ident->buffer[0])) + { + /* here we handle type aliases of type + * type foo = AnotherModule.bar + * AnotherModule can mistakenly be took + * for a constructor. */ + vStringCopy (tempIdent, ident); + prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR); + toDoNext = &constructorValidation; + } + else + { + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + } + break; + + case OcaKEYWORD_and: + toDoNext = &typeDecl; + break; + + case Tok_BRL: /* the '[' & ']' are ignored to accommodate */ + case Tok_BRR: /* with the revised syntax */ + case Tok_Pipe: + /* just ignore it */ + break; + + case Tok_CurlL: + toDoNext = &typeRecord; + break; + + default: /* don't care */ + break; + } +} + + +static boolean dirtySpecialParam = FALSE; + + +/* parse the ~label and ~label:type parameter */ +static void parseLabel (vString * const ident, ocaToken what) +{ + static int parCount = 0; + + switch (what) + { + case OcaIDENTIFIER: + if (!dirtySpecialParam) + { + + if (exportLocalInfo) + addTag (ident, K_VAR); + + dirtySpecialParam = TRUE; + } + break; + + case Tok_PARL: + parCount++; + break; + + case Tok_PARR: + parCount--; + if (parCount == 0) + toDoNext = &letParam; + break; + + case Tok_Op: + if (ident->buffer[0] == ':') + { + toDoNext = &ignoreToken; + comeAfter = &letParam; + } + else if (parCount == 0 && dirtySpecialParam) + { + toDoNext = &letParam; + letParam (ident, what); + } + break; + + default: + if (parCount == 0 && dirtySpecialParam) + { + toDoNext = &letParam; + letParam (ident, what); + } + break; + } +} + + +/* Optional argument with syntax like this : + * ?(foo = value) */ +static void parseOptionnal (vString * const ident, ocaToken what) +{ + static int parCount = 0; + + + switch (what) + { + case OcaIDENTIFIER: + if (!dirtySpecialParam) + { + if (exportLocalInfo) + addTag (ident, K_VAR); + + dirtySpecialParam = TRUE; + + if (parCount == 0) + toDoNext = &letParam; + } + break; + + case Tok_PARL: + parCount++; + break; + + case Tok_PARR: + parCount--; + if (parCount == 0) + toDoNext = &letParam; + break; + + default: /* don't care */ + break; + } +} + + +/** handle let inside functions (so like it's name + * say : local let */ +static void localLet (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... + */ + break; + + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case Tok_Op: + /* we are defining a new operator, it's a + * function definition */ + if (exportLocalInfo) + addTag (ident, K_FUNCTION); + + pushSoftContext (mayRedeclare, ident, ContextFunction); + toDoNext = &letParam; + break; + + /* Can be a weiiird binding, or an '_' */ + case Tok_Val: + if (exportLocalInfo) + addTag (ident, K_VAR); + pushSoftContext (mayRedeclare, ident, ContextValue); + toDoNext = &letParam; + break; + + case OcaIDENTIFIER: + if (exportLocalInfo) + addTag (ident, K_VAR); + pushSoftContext (mayRedeclare, ident, ContextValue); + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + popStrongContext (); + break; + + default: + toDoNext = &localScope; + break; + } +} + +/* parse : + * | pattern pattern -> ... + * or + * pattern apttern apttern -> ... + * we ignore all identifiers declared in the pattern, + * because their scope is likely to be even more limited + * than the let definitions. + * Used after a match ... with, or a function ... or fun ... + * because their syntax is similar. */ +static void matchPattern (vString * const ident, ocaToken what) +{ + /* keep track of [], as it + * can be used in patterns and can + * mean the end of match expression in + * revised syntax */ + static int braceCount = 0; + + switch (what) + { + case Tok_To: + pushEmptyContext (&matchPattern); + toDoNext = &mayRedeclare; + break; + + case Tok_BRL: + braceCount++; + break; + + case OcaKEYWORD_value: + popLastNamed (); + globalScope (ident, what); + break; + + case OcaKEYWORD_in: + popLastNamed (); + break; + + default: + break; + } +} + +/* Used at the beginning of a new scope (begin of a + * definition, parenthesis...) to catch inner let + * definition that may be in. */ +static void mayRedeclare (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaKEYWORD_value: + // let globalScope handle it + globalScope (ident, what); + break; + + case OcaKEYWORD_let: + case OcaKEYWORD_val: + toDoNext = localLet; + break; + + case OcaKEYWORD_object: + vStringClear (lastClass); + pushContext (ContextStrong, ContextClass, + &localScope, NULL /*voidName */ ); + needStrongPoping = FALSE; + toDoNext = &globalScope; + break; + + case OcaKEYWORD_for: + case OcaKEYWORD_while: + toDoNext = &tillToken; + waitedToken = OcaKEYWORD_do; + comeAfter = &mayRedeclare; + break; + + case OcaKEYWORD_try: + toDoNext = &mayRedeclare; + pushSoftContext (matchPattern, ident, ContextFunction); + break; + + case OcaKEYWORD_fun: + toDoNext = &matchPattern; + break; + + /* Handle the special ;; from the OCaml + * Top level */ + case Tok_semi: + default: + toDoNext = &localScope; + localScope (ident, what); + } +} + +/* parse : + * p1 p2 ... pn = ... + * or + * ?(p1=v) p2 ~p3 ~pn:ja ... = ... */ +static void letParam (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_EQ: + toDoNext = &mayRedeclare; + break; + + case OcaIDENTIFIER: + if (exportLocalInfo) + addTag (ident, K_VAR); + break; + + case Tok_Op: + switch (ident->buffer[0]) + { + case ':': + /*popSoftContext(); */ + /* we got a type signature */ + comeAfter = &mayRedeclare; + toDoNext = &tillTokenOrFallback; + waitedToken = Tok_EQ; + break; + + /* parse something like + * ~varname:type + * or + * ~varname + * or + * ~(varname: long type) */ + case '~': + toDoNext = &parseLabel; + dirtySpecialParam = FALSE; + break; + + /* Optional argument with syntax like this : + * ?(bla = value) + * or + * ?bla */ + case '?': + toDoNext = &parseOptionnal; + dirtySpecialParam = FALSE; + break; + + default: + break; + } + break; + + default: /* don't care */ + break; + } +} + + +/* parse object ... + * used to be sure the class definition is not a type + * alias */ +static void classSpecif (vString * const UNUSED (ident), ocaToken what) +{ + switch (what) + { + case OcaKEYWORD_object: + pushStrongContext (lastClass, ContextClass); + toDoNext = &globalScope; + break; + + default: + vStringClear (lastClass); + toDoNext = &globalScope; + } +} + +/* Handle a method ... class declaration. + * nearly a copy/paste of globalLet. */ +static void methodDecl (vString * const ident, ocaToken what) +{ + + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... */ + break; + + case OcaKEYWORD_mutable: + case OcaKEYWORD_virtual: + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case OcaIDENTIFIER: + addTag (ident, K_METHOD); + /* Normal pushing to get good subs */ + pushStrongContext (ident, ContextMethod); + /*pushSoftContext( globalScope, ident, ContextMethod ); */ + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + popStrongContext (); + break; + + default: + toDoNext = &globalScope; + break; + } +} + +/* name of the last module, used for + * context stacking. */ +vString *lastModule; + + +/* parse + * ... struct (* new global scope *) end + * or + * ... sig (* new global scope *) end + * or + * functor ... -> moduleSpecif + */ +static void moduleSpecif (vString * const ident, ocaToken what) +{ + + switch (what) + { + case OcaKEYWORD_functor: + toDoNext = &contextualTillToken; + waitedToken = Tok_To; + comeAfter = &moduleSpecif; + break; + + case OcaKEYWORD_struct: + case OcaKEYWORD_sig: + pushStrongContext (lastModule, ContextModule); + toDoNext = &globalScope; + break; + + case Tok_PARL: /* ( */ + toDoNext = &contextualTillToken; + comeAfter = &globalScope; + waitedToken = Tok_PARR; + contextualTillToken (ident, what); + break; + + default: + vStringClear (lastModule); + toDoNext = &globalScope; + } +} + +/* parse : + * module name = ... + * then pass the token stream to moduleSpecif */ +static void moduleDecl (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaKEYWORD_type: + /* just ignore it, name come after */ + break; + + case OcaIDENTIFIER: + addTag (ident, K_MODULE); + vStringCopy (lastModule, ident); + waitedToken = Tok_EQ; + comeAfter = &moduleSpecif; + toDoNext = &contextualTillToken; + break; + + default: /* don't care */ + break; + } +} + +/* parse : + * class name = ... + * or + * class virtual ['a,'b] classname = ... */ +static void classDecl (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaIDENTIFIER: + addTag (ident, K_CLASS); + vStringCopy (lastClass, ident); + toDoNext = &contextualTillToken; + waitedToken = Tok_EQ; + comeAfter = &classSpecif; + break; + + case Tok_BRL: + toDoNext = &tillToken; + waitedToken = Tok_BRR; + comeAfter = &classDecl; + break; + + default: + break; + } +} + +/* Handle a global + * let ident ... + * or + * let rec ident ... */ +static void globalLet (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... + */ + break; + + case OcaKEYWORD_mutable: + case OcaKEYWORD_virtual: + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case Tok_Op: + /* we are defining a new operator, it's a + * function definition */ + addTag (ident, K_FUNCTION); + pushStrongContext (ident, ContextFunction); + toDoNext = &letParam; + break; + + case OcaIDENTIFIER: + addTag (ident, K_VAR); + pushStrongContext (ident, ContextValue); + requestStrongPoping (); + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + popStrongContext (); + break; + + default: + toDoNext = &globalScope; + break; + } +} + +/* Handle the "strong" top levels, all 'big' declarations + * happen here */ +static void globalScope (vString * const UNUSED (ident), ocaToken what) +{ + /* Do not touch, this is used only by the global scope + * to handle an 'and' */ + static parseNext previousParser = &globalScope; + + switch (what) + { + case OcaKEYWORD_and: + cleanupPreviousParser (); + toDoNext = previousParser; + break; + + case OcaKEYWORD_type: + cleanupPreviousParser (); + toDoNext = &typeDecl; + previousParser = &typeDecl; + break; + + case OcaKEYWORD_class: + cleanupPreviousParser (); + toDoNext = &classDecl; + previousParser = &classDecl; + break; + + case OcaKEYWORD_module: + cleanupPreviousParser (); + toDoNext = &moduleDecl; + previousParser = &moduleDecl; + break; + + case OcaKEYWORD_end: + needStrongPoping = FALSE; + killCurrentState (); + /*popStrongContext(); */ + break; + + case OcaKEYWORD_method: + cleanupPreviousParser (); + toDoNext = &methodDecl; + /* and is not allowed in methods */ + break; + + /* val is mixed with let as global + * to be able to handle mli & new syntax */ + case OcaKEYWORD_val: + case OcaKEYWORD_value: + case OcaKEYWORD_let: + cleanupPreviousParser (); + toDoNext = &globalLet; + previousParser = &globalLet; + break; + + case OcaKEYWORD_exception: + cleanupPreviousParser (); + toDoNext = &exceptionDecl; + previousParser = &globalScope; + break; + + /* must be a #line directive, discard the + * whole line. */ + case Tok_Sharp: + /* ignore */ + break; + + default: + /* we don't care */ + break; + } +} + +/* Parse expression. Well ignore it is more the case, + * ignore all tokens except "shocking" keywords */ +static void localScope (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_Pipe: + case Tok_PARR: + case Tok_BRR: + case Tok_CurlR: + popSoftContext (); + break; + + /* Everything that `begin` has an `end` + * as end is overloaded and signal many end + * of things, we add an empty strong context to + * avoid problem with the end. + */ + case OcaKEYWORD_begin: + pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_in: + popLastNamed (); + break; + + /* Ok, we got a '{', which is much likely to create + * a record. We cannot treat it like other [ && (, + * because it may contain the 'with' keyword and screw + * everything else. */ + case Tok_CurlL: + toDoNext = &contextualTillToken; + waitedToken = Tok_CurlR; + comeAfter = &localScope; + contextualTillToken (ident, what); + break; + + /* Yeah imperative feature of OCaml, + * a ';' like in C */ + case Tok_semi: + toDoNext = &mayRedeclare; + break; + + case Tok_PARL: + case Tok_BRL: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_and: + popLastNamed (); + toDoNext = &localLet; + break; + + case OcaKEYWORD_else: + case OcaKEYWORD_then: + popSoftContext (); + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_if: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_match: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_with: + popSoftContext (); + toDoNext = &matchPattern; + pushEmptyContext (&matchPattern); + break; + + case OcaKEYWORD_end: + killCurrentState (); + break; + + + case OcaKEYWORD_fun: + comeAfter = &mayRedeclare; + toDoNext = &tillToken; + waitedToken = Tok_To; + break; + + case OcaKEYWORD_done: + case OcaKEYWORD_val: + /* doesn't care */ + break; + + default: + requestStrongPoping (); + globalScope (ident, what); + break; + } +} + +/*//////////////////////////////////////////////////////////////// +//// Deal with the system */ +/* in OCaml the file name is the module name used in the language + * with it first letter put in upper case */ +static void computeModuleName ( void ) +{ + /* in Ocaml the file name define a module. + * so we define a module =) + */ + const char *filename = getSourceFileName (); + int beginIndex = 0; + int endIndex = strlen (filename) - 1; + vString *moduleName = vStringNew (); + + while (filename[endIndex] != '.' && endIndex > 0) + endIndex--; + + /* avoid problem with path in front of filename */ + beginIndex = endIndex; + while (beginIndex > 0) + { + if (filename[beginIndex] == '\\' || filename[beginIndex] == '/') + { + beginIndex++; + break; + } + + beginIndex--; + } + + vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex); + vStringTerminate (moduleName); + + if (isLowerAlpha (moduleName->buffer[0])) + moduleName->buffer[0] += ('A' - 'a'); + + addTag (moduleName, K_MODULE); + vStringDelete (moduleName); +} + +/* Allocate all string of the context stack */ +static void initStack ( void ) +{ + int i; + for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i) + stack[i].contextName = vStringNew (); + stackIndex = 0; +} + +static void clearStack ( void ) +{ + int i; + for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i) + vStringDelete (stack[i].contextName); +} + +static void findOcamlTags (void) +{ + vString *name = vStringNew (); + lexingState st; + ocaToken tok; + + initStack (); + computeModuleName (); + tempIdent = vStringNew (); + lastModule = vStringNew (); + lastClass = vStringNew (); + voidName = vStringNew (); + vStringCopyS (voidName, "_"); + + st.name = vStringNew (); + st.cp = fileReadLine (); + toDoNext = &globalScope; + tok = lex (&st); + while (tok != Tok_EOF) + { + (*toDoNext) (st.name, tok); + tok = lex (&st); + } + + vStringDelete (name); + vStringDelete (voidName); + vStringDelete (tempIdent); + vStringDelete (lastModule); + vStringDelete (lastClass); + clearStack (); +} + +static void ocamlInitialize (const langType language) +{ + Lang_Ocaml = language; + + initOperatorTable (); + initKeywordHash (); +} + +extern parserDefinition *OcamlParser (void) +{ + static const char *const extensions[] = { "ml", "mli", NULL }; + parserDefinition *def = parserNew ("OCaml"); + def->kinds = OcamlKinds; + def->kindCount = KIND_COUNT (OcamlKinds); + def->extensions = extensions; + def->parser = findOcamlTags; + def->initialize = ocamlInitialize; + + return def; +} diff --git a/third_party/ctags/options.c b/third_party/ctags/options.c new file mode 100644 index 000000000..16265a475 --- /dev/null +++ b/third_party/ctags/options.c @@ -0,0 +1,1847 @@ +// clang-format off +/* +* $Id: options.c 576 2007-06-30 04:16:23Z elliotth $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions to process command line options. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/termios.h" +#include "libc/fmt/conv.h" +#include "libc/limits.h" +#include "libc/mem/alg.h" +#include "libc/mem/mem.h" +#include "libc/runtime/runtime.h" +#include "libc/stdio/rand.h" +#include "libc/stdio/temp.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/exit.h" +#include "third_party/gdtoa/gdtoa.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/rand48.h" +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#include "libc/str/str.h" /* to declare isspace () */ + +#include "third_party/ctags/ctags.h" +#include "third_party/ctags/debug.h" +#include "third_party/ctags/main.h" +#define OPTION_WRITE +#include "third_party/ctags/options.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/routines.h" + +/* +* MACROS +*/ +#define INVOCATION "Usage: %s [options] [file(s)]\n" + +#define CTAGS_ENVIRONMENT "CTAGS" +#define ETAGS_ENVIRONMENT "ETAGS" + +#define CTAGS_FILE "tags" +#define ETAGS_FILE "TAGS" + +#ifndef ETAGS +# define ETAGS "etags" /* name which causes default use of to -e */ +#endif + +/* The following separators are permitted for list options. + */ +#define EXTENSION_SEPARATOR '.' +#define PATTERN_START '(' +#define PATTERN_STOP ')' +#define IGNORE_SEPARATORS ", \t\n" + +#ifndef DEFAULT_FILE_FORMAT +# define DEFAULT_FILE_FORMAT 2 +#endif + +#if defined (HAVE_OPENDIR) || defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST) || defined (AMIGA) +# define RECURSE_SUPPORTED +#endif + +#define isCompoundOption(c) (boolean) (strchr ("fohiILpDb", (c)) != NULL) + +/* +* Data declarations +*/ + +enum eOptionLimits { + MaxHeaderExtensions = 100, /* maximum number of extensions in -h option */ + MaxSupportedTagFormat = 2 +}; + +typedef struct sOptionDescription { + int usedByEtags; + const char *description; +} optionDescription; + +typedef void (*parametricOptionHandler) (const char *const option, const char *const parameter); + +typedef const struct { + const char* name; /* name of option as specified by user */ + parametricOptionHandler handler; /* routine to handle option */ + boolean initOnly; /* option must be specified before any files */ +} parametricOption; + +typedef const struct { + const char* name; /* name of option as specified by user */ + boolean* pValue; /* pointer to option value */ + boolean initOnly; /* option must be specified before any files */ +} booleanOption; + +/* +* DATA DEFINITIONS +*/ + +static boolean NonOptionEncountered; +static stringList *OptionFiles; +static stringList* Excluded; +static boolean FilesRequired = TRUE; +static boolean SkipConfiguration; + +static const char *const HeaderExtensions [] = { + "h", "H", "hh", "hpp", "hxx", "h++", "inc", "def", NULL +}; + +optionValues Option = { + { + FALSE, /* --extra=f */ + FALSE, /* --extra=q */ + TRUE, /* --file-scope */ + }, + { + FALSE, /* -fields=a */ + TRUE, /* -fields=f */ + FALSE, /* -fields=m */ + FALSE, /* -fields=i */ + TRUE, /* -fields=k */ + FALSE, /* -fields=z */ + FALSE, /* -fields=K */ + FALSE, /* -fields=l */ + FALSE, /* -fields=n */ + TRUE, /* -fields=s */ + FALSE, /* -fields=S */ + TRUE /* -fields=t */ + }, + NULL, /* -I */ + FALSE, /* -a */ + FALSE, /* -B */ + FALSE, /* -e */ +#ifdef MACROS_USE_PATTERNS + EX_PATTERN, /* -n, --excmd */ +#else + EX_MIX, /* -n, --excmd */ +#endif + FALSE, /* -R */ + SO_SORTED, /* -u, --sort */ + FALSE, /* -V */ + FALSE, /* -x */ + NULL, /* -L */ + NULL, /* -o */ + NULL, /* -h */ + NULL, /* --etags-include */ + DEFAULT_FILE_FORMAT,/* --format */ + FALSE, /* --if0 */ + FALSE, /* --kind-long */ + LANG_AUTO, /* --lang */ + TRUE, /* --links */ + FALSE, /* --filter */ + NULL, /* --filter-terminator */ + FALSE, /* --tag-relative */ + FALSE, /* --totals */ + FALSE, /* --line-directives */ +#ifdef DEBUG + 0, 0 /* -D, -b */ +#endif +}; + +/* +- Locally used only +*/ + +static optionDescription LongOptionDescription [] = { + {1," -a Append the tags to an existing tag file."}, +#ifdef DEBUG + {1," -b "}, + {1," Set break line."}, +#endif + {0," -B Use backward searching patterns (?...?)."}, +#ifdef DEBUG + {1," -D "}, + {1," Set debug level."}, +#endif + {0," -e Output tag file for use with Emacs."}, + {1," -f "}, + {1," Write tags to specified file. Value of \"-\" writes tags to stdout"}, + {1," [\"tags\"; or \"TAGS\" when -e supplied]."}, + {0," -F Use forward searching patterns (/.../) (default)."}, + {1," -h "}, + {1," Specify list of file extensions to be treated as include files."}, + {1," [\".h.H.hh.hpp.hxx.h++\"]."}, + {1," -I "}, + {1," A list of tokens to be specially handled is read from either the"}, + {1," command line or the specified file."}, + {1," -L "}, + {1," A list of source file names are read from the specified file."}, + {1," If specified as \"-\", then standard input is read."}, + {0," -n Equivalent to --excmd=number."}, + {0," -N Equivalent to --excmd=pattern."}, + {1," -o Alternative for -f."}, +#ifdef RECURSE_SUPPORTED + {1," -R Equivalent to --recurse."}, +#else + {1," -R Not supported on this platform."}, +#endif + {0," -u Equivalent to --sort=no."}, + {1," -V Equivalent to --verbose."}, + {1," -x Print a tabular cross reference file to standard output."}, + {1," --append=[yes|no]"}, + {1," Should tags should be appended to existing tag file [no]?"}, + {1," --etags-include=file"}, + {1," Include reference to 'file' in Emacs-style tag file (requires -e)."}, + {1," --exclude=pattern"}, + {1," Exclude files and directories matching 'pattern'."}, + {0," --excmd=number|pattern|mix"}, +#ifdef MACROS_USE_PATTERNS + {0," Uses the specified type of EX command to locate tags [pattern]."}, +#else + {0," Uses the specified type of EX command to locate tags [mix]."}, +#endif + {1," --extra=[+|-]flags"}, + {1," Include extra tag entries for selected information (flags: \"fq\")."}, + {1," --fields=[+|-]flags"}, + {1," Include selected extension fields (flags: \"afmikKlnsStz\") [fks]."}, + {1," --file-scope=[yes|no]"}, + {1," Should tags scoped only for a single file (e.g. \"static\" tags"}, + {1," be included in the output [yes]?"}, + {1," --filter=[yes|no]"}, + {1," Behave as a filter, reading file names from standard input and"}, + {1," writing tags to standard output [no]."}, + {1," --filter-terminator=string"}, + {1," Specify string to print to stdout following the tags for each file"}, + {1," parsed when --filter is enabled."}, + {0," --format=level"}, +#if DEFAULT_FILE_FORMAT == 1 + {0," Force output of specified tag file format [1]."}, +#else + {0," Force output of specified tag file format [2]."}, +#endif + {1," --help"}, + {1," Print this option summary."}, + {1," --if0=[yes|no]"}, + {1," Should C code within #if 0 conditional branches be parsed [no]?"}, + {1," ---kinds=[+|-]kinds"}, + {1," Enable/disable tag kinds for language ."}, + {1," --langdef=name"}, + {1," Define a new language to be parsed with regular expressions."}, + {1," --langmap=map(s)"}, + {1," Override default mapping of language to source file extension."}, + {1," --language-force=language"}, + {1," Force all files to be interpreted using specified language."}, + {1," --languages=[+|-]list"}, + {1," Restrict files scanned for tags to those mapped to langauges"}, + {1," specified in the comma-separated 'list'. The list can contain any"}, + {1," built-in or user-defined language [all]."}, + {1," --license"}, + {1," Print details of software license."}, + {0," --line-directives=[yes|no]"}, + {0," Should #line directives be processed [no]?"}, + {1," --links=[yes|no]"}, + {1," Indicate whether symbolic links should be followed [yes]."}, + {1," --list-kinds=[language|all]"}, + {1," Output a list of all tag kinds for specified language or all."}, + {1," --list-languages"}, + {1," Output list of supported languages."}, + {1," --list-maps=[language|all]"}, + {1," Output list of language mappings."}, + {1," --options=file"}, + {1," Specify file from which command line options should be read."}, + {1," --recurse=[yes|no]"}, +#ifdef RECURSE_SUPPORTED + {1," Recurse into directories supplied on command line [no]."}, +#else + {1," Not supported on this platform."}, +#endif +#ifdef HAVE_REGEX + {1," --regex-=/line_pattern/name_pattern/[flags]"}, + {1," Define regular expression for locating tags in specific language."}, +#endif + {0," --sort=[yes|no|foldcase]"}, + {0," Should tags be sorted (optionally ignoring case) [yes]?."}, + {0," --tag-relative=[yes|no]"}, + {0," Should paths be relative to location of tag file [no; yes when -e]?"}, + {1," --totals=[yes|no]"}, + {1," Print statistics about source and tag files [no]."}, + {1," --verbose=[yes|no]"}, + {1," Enable verbose messages describing actions on each source file."}, + {1," --version"}, + {1," Print version identifier to standard output."}, + {1, NULL} +}; + +static const char* const License1 = +"This program is free software; you can redistribute it and/or\n" +"modify it under the terms of the GNU General Public License\n" +"as published by the Free Software Foundation; either version 2\n" +"of the License, or (at your option) any later version.\n" +"\n"; +static const char* const License2 = +"This program is distributed in the hope that it will be useful,\n" +"but WITHOUT ANY WARRANTY; without even the implied warranty of\n" +"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" +"GNU General Public License for more details.\n" +"\n" +"You should have received a copy of the GNU General Public License\n" +"along with this program; if not, write to the Free Software\n" +"Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n"; + +/* Contains a set of strings describing the set of "features" compiled into + * the code. + */ +static const char *const Features [] = { +#ifdef WIN32 + "win32", +#endif +#ifdef DJGPP + "msdos_32", +#else +# ifdef MSDOS + "msdos_16", +# endif +#endif +#ifdef OS2 + "os2", +#endif +#ifdef AMIGA + "amiga", +#endif +#ifdef VMS + "vms", +#endif +#ifdef HAVE_FNMATCH + "wildcards", +#endif +#ifdef HAVE_REGEX + "regex", +#endif +#ifndef EXTERNAL_SORT + "internal-sort", +#endif +#ifdef CUSTOM_CONFIGURATION_FILE + "custom-conf", +#endif +#if (defined (MSDOS) || defined (WIN32) || defined (OS2)) && defined (UNIX_PATH_SEPARATOR) + "unix-path-separator", +#endif +#ifdef DEBUG + "debug", +#endif + NULL +}; + +/* +* FUNCTION PROTOTYPES +*/ +static boolean parseFileOptions (const char *const fileName); + +/* +* FUNCTION DEFINITIONS +*/ + +extern void verbose (const char *const format, ...) +{ + if (Option.verbose) + { + va_list ap; + va_start (ap, format); + vprintf (format, ap); + va_end (ap); + } +} + +static char *stringCopy (const char *const string) +{ + char* result = NULL; + if (string != NULL) + result = eStrdup (string); + return result; +} + +static void freeString (char **const pString) +{ + if (*pString != NULL) + { + eFree (*pString); + *pString = NULL; + } +} + +extern void freeList (stringList** const pList) +{ + if (*pList != NULL) + { + stringListDelete (*pList); + *pList = NULL; + } +} + +extern void setDefaultTagFileName (void) +{ + if (Option.tagFileName != NULL) + ; /* accept given name */ + else if (Option.etags) + Option.tagFileName = stringCopy (ETAGS_FILE); + else + Option.tagFileName = stringCopy (CTAGS_FILE); +} + +extern boolean filesRequired (void) +{ + boolean result = FilesRequired; + if (Option.recurse) + result = FALSE; + return result; +} + +extern void checkOptions (void) +{ + const char* notice; + if (Option.xref) + { + notice = "xref output"; + if (Option.include.fileNames) + { + error (WARNING, "%s disables file name tags", notice); + Option.include.fileNames = FALSE; + } + } + if (Option.append) + { + notice = "append mode is not compatible with"; + if (isDestinationStdout ()) + error (FATAL, "%s tags to stdout", notice); + } + if (Option.filter) + { + notice = "filter mode"; + if (Option.printTotals) + { + error (WARNING, "%s disables totals", notice); + Option.printTotals = FALSE; + } + if (Option.tagFileName != NULL) + error (WARNING, "%s ignores output tag file name", notice); + } +} + +static void setEtagsMode (void) +{ + Option.etags = TRUE; + Option.sorted = SO_UNSORTED; + Option.lineDirectives = FALSE; + Option.tagRelative = TRUE; +} + +extern void testEtagsInvocation (void) +{ + char* const execName = eStrdup (getExecutableName ()); + char* const etags = eStrdup (ETAGS); +#ifdef CASE_INSENSITIVE_FILENAMES + toLowerString (execName); + toLowerString (etags); +#endif + if (strstr (execName, etags) != NULL) + { + verbose ("Running in etags mode\n"); + setEtagsMode (); + } + eFree (execName); + eFree (etags); +} + +/* + * Cooked argument parsing + */ + +static void parseShortOption (cookedArgs *const args) +{ + args->simple [0] = *args->shortOptions++; + args->simple [1] = '\0'; + args->item = args->simple; + if (! isCompoundOption (*args->simple)) + args->parameter = ""; + else if (*args->shortOptions == '\0') + { + argForth (args->args); + if (argOff (args->args)) + args->parameter = NULL; + else + args->parameter = argItem (args->args); + args->shortOptions = NULL; + } + else + { + args->parameter = args->shortOptions; + args->shortOptions = NULL; + } +} + +static void parseLongOption (cookedArgs *const args, const char *item) +{ + const char* const equal = strchr (item, '='); + if (equal == NULL) + { + args->item = eStrdup (item); /* FIXME: memory leak. */ + args->parameter = ""; + } + else + { + const size_t length = equal - item; + args->item = xMalloc (length + 1, char); /* FIXME: memory leak. */ + strncpy (args->item, item, length); + args->item [length] = '\0'; + args->parameter = equal + 1; + } + Assert (args->item != NULL); + Assert (args->parameter != NULL); +} + +static void cArgRead (cookedArgs *const current) +{ + char* item; + + Assert (current != NULL); + if (! argOff (current->args)) + { + item = argItem (current->args); + current->shortOptions = NULL; + Assert (item != NULL); + if (strncmp (item, "--", (size_t) 2) == 0) + { + current->isOption = TRUE; + current->longOption = TRUE; + parseLongOption (current, item + 2); + Assert (current->item != NULL); + Assert (current->parameter != NULL); + } + else if (*item == '-') + { + current->isOption = TRUE; + current->longOption = FALSE; + current->shortOptions = item + 1; + parseShortOption (current); + } + else + { + current->isOption = FALSE; + current->longOption = FALSE; + current->item = item; + current->parameter = NULL; + } + } +} + +extern cookedArgs* cArgNewFromString (const char* string) +{ + cookedArgs* const result = xMalloc (1, cookedArgs); + memset (result, 0, sizeof (cookedArgs)); + result->args = argNewFromString (string); + cArgRead (result); + return result; +} + +extern cookedArgs* cArgNewFromArgv (char* const* const argv) +{ + cookedArgs* const result = xMalloc (1, cookedArgs); + memset (result, 0, sizeof (cookedArgs)); + result->args = argNewFromArgv (argv); + cArgRead (result); + return result; +} + +extern cookedArgs* cArgNewFromFile (FILE* const fp) +{ + cookedArgs* const result = xMalloc (1, cookedArgs); + memset (result, 0, sizeof (cookedArgs)); + result->args = argNewFromFile (fp); + cArgRead (result); + return result; +} + +extern cookedArgs* cArgNewFromLineFile (FILE* const fp) +{ + cookedArgs* const result = xMalloc (1, cookedArgs); + memset (result, 0, sizeof (cookedArgs)); + result->args = argNewFromLineFile (fp); + cArgRead (result); + return result; +} + +extern void cArgDelete (cookedArgs* const current) +{ + Assert (current != NULL); + argDelete (current->args); + memset (current, 0, sizeof (cookedArgs)); + eFree (current); +} + +static boolean cArgOptionPending (cookedArgs* const current) +{ + boolean result = FALSE; + if (current->shortOptions != NULL) + if (*current->shortOptions != '\0') + result = TRUE; + return result; +} + +extern boolean cArgOff (cookedArgs* const current) +{ + Assert (current != NULL); + return (boolean) (argOff (current->args) && ! cArgOptionPending (current)); +} + +extern boolean cArgIsOption (cookedArgs* const current) +{ + Assert (current != NULL); + return current->isOption; +} + +extern const char* cArgItem (cookedArgs* const current) +{ + Assert (current != NULL); + return current->item; +} + +extern void cArgForth (cookedArgs* const current) +{ + Assert (current != NULL); + Assert (! cArgOff (current)); + if (cArgOptionPending (current)) + parseShortOption (current); + else + { + Assert (! argOff (current->args)); + argForth (current->args); + if (! argOff (current->args)) + cArgRead (current); + else + { + current->isOption = FALSE; + current->longOption = FALSE; + current->shortOptions = NULL; + current->item = NULL; + current->parameter = NULL; + } + } +} + +/* + * File extension and language mapping + */ + +static void addExtensionList ( + stringList *const slist, const char *const elist, const boolean clear) +{ + char *const extensionList = eStrdup (elist); + const char *extension = NULL; + boolean first = TRUE; + + if (clear) + { + verbose (" clearing\n"); + stringListClear (slist); + } + verbose (" adding: "); + if (elist != NULL && *elist != '\0') + { + extension = extensionList; + if (elist [0] == EXTENSION_SEPARATOR) + ++extension; + } + while (extension != NULL) + { + char *separator = strchr (extension, EXTENSION_SEPARATOR); + if (separator != NULL) + *separator = '\0'; + verbose ("%s%s", first ? "" : ", ", + *extension == '\0' ? "(NONE)" : extension); + stringListAdd (slist, vStringNewInit (extension)); + first = FALSE; + if (separator == NULL) + extension = NULL; + else + extension = separator + 1; + } + if (Option.verbose) + { + printf ("\n now: "); + stringListPrint (slist); + putchar ('\n'); + } + eFree (extensionList); +} + +static boolean isFalse (const char *parameter) +{ + return (boolean) ( + strcasecmp (parameter, "0" ) == 0 || + strcasecmp (parameter, "n" ) == 0 || + strcasecmp (parameter, "no" ) == 0 || + strcasecmp (parameter, "off") == 0); +} + +static boolean isTrue (const char *parameter) +{ + return (boolean) ( + strcasecmp (parameter, "1" ) == 0 || + strcasecmp (parameter, "y" ) == 0 || + strcasecmp (parameter, "yes") == 0 || + strcasecmp (parameter, "on" ) == 0); +} + +/* Determines whether the specified file name is considered to be a header + * file for the purposes of determining whether enclosed tags are global or + * static. + */ +extern boolean isIncludeFile (const char *const fileName) +{ + boolean result = FALSE; + const char *const extension = fileExtension (fileName); + if (Option.headerExt != NULL) + result = stringListExtensionMatched (Option.headerExt, extension); + return result; +} + +/* + * Specific option processing + */ + +static void processEtagsInclude ( + const char *const option, const char *const parameter) +{ + if (! Option.etags) + error (FATAL, "Etags must be enabled to use \"%s\" option", option); + else + { + vString *const file = vStringNewInit (parameter); + if (Option.etagsInclude == NULL) + Option.etagsInclude = stringListNew (); + stringListAdd (Option.etagsInclude, file); + FilesRequired = FALSE; + } +} + +static void processExcludeOption ( + const char *const option __unused, const char *const parameter) +{ + const char *const fileName = parameter + 1; + if (parameter [0] == '\0') + freeList (&Excluded); + else if (parameter [0] == '@') + { + stringList* const sl = stringListNewFromFile (fileName); + if (sl == NULL) + error (FATAL | PERROR, "cannot open \"%s\"", fileName); + if (Excluded == NULL) + Excluded = sl; + else + stringListCombine (Excluded, sl); + verbose (" adding exclude patterns from %s\n", fileName); + } + else + { + vString *const item = vStringNewInit (parameter); + if (Excluded == NULL) + Excluded = stringListNew (); + stringListAdd (Excluded, item); + verbose (" adding exclude pattern: %s\n", parameter); + } +} + +extern boolean isExcludedFile (const char* const name) +{ + const char* base = baseFilename (name); + boolean result = FALSE; + if (Excluded != NULL) + { + result = stringListFileMatched (Excluded, base); + if (! result && name != base) + result = stringListFileMatched (Excluded, name); + } + return result; +} + +static void processExcmdOption ( + const char *const option, const char *const parameter) +{ + switch (*parameter) + { + case 'm': Option.locate = EX_MIX; break; + case 'n': Option.locate = EX_LINENUM; break; + case 'p': Option.locate = EX_PATTERN; break; + default: + error (FATAL, "Invalid value for \"%s\" option", option); + break; + } +} + +static void processExtraTagsOption ( + const char *const option, const char *const parameter) +{ + struct sInclude *const inc = &Option.include; + const char *p = parameter; + boolean mode = TRUE; + int c; + + if (*p != '+' && *p != '-') + { + inc->fileNames = FALSE; + inc->qualifiedTags = FALSE; +#if 0 + inc->fileScope = FALSE; +#endif + } + while ((c = *p++) != '\0') switch (c) + { + case '+': mode = TRUE; break; + case '-': mode = FALSE; break; + + case 'f': inc->fileNames = mode; break; + case 'q': inc->qualifiedTags = mode; break; +#if 0 + case 'F': inc->fileScope = mode; break; +#endif + + default: error(WARNING, "Unsupported parameter '%c' for \"%s\" option", + c, option); + break; + } +} + +static void processFieldsOption ( + const char *const option, const char *const parameter) +{ + struct sExtFields *field = &Option.extensionFields; + const char *p = parameter; + boolean mode = TRUE; + int c; + + if (*p != '+' && *p != '-') + { + field->access = FALSE; + field->fileScope = FALSE; + field->implementation = FALSE; + field->inheritance = FALSE; + field->kind = FALSE; + field->kindKey = FALSE; + field->kindLong = FALSE; + field->language = FALSE; + field->scope = FALSE; + field->typeRef = FALSE; + } + while ((c = *p++) != '\0') switch (c) + { + case '+': mode = TRUE; break; + case '-': mode = FALSE; break; + + case 'a': field->access = mode; break; + case 'f': field->fileScope = mode; break; + case 'm': field->implementation = mode; break; + case 'i': field->inheritance = mode; break; + case 'k': field->kind = mode; break; + case 'K': field->kindLong = mode; break; + case 'l': field->language = mode; break; + case 'n': field->lineNumber = mode; break; + case 's': field->scope = mode; break; + case 'S': field->signature = mode; break; + case 'z': field->kindKey = mode; break; + case 't': field->typeRef = mode; break; + + default: error(WARNING, "Unsupported parameter '%c' for \"%s\" option", + c, option); + break; + } +} + +static void processFilterTerminatorOption ( + const char *const option __unused, const char *const parameter) +{ + freeString (&Option.filterTerminator); + Option.filterTerminator = stringCopy (parameter); +} + +static void processFormatOption ( + const char *const option, const char *const parameter) +{ + unsigned int format; + + if (sscanf (parameter, "%u", &format) < 1) + error (FATAL, "Invalid value for \"%s\" option",option); + else if (format <= (unsigned int) MaxSupportedTagFormat) + Option.tagFileFormat = format; + else + error (FATAL, "Unsupported value for \"%s\" option", option); +} + +static void printInvocationDescription (void) +{ + printf (INVOCATION, getExecutableName ()); +} + +static void printOptionDescriptions (const optionDescription *const optDesc) +{ + int i; + for (i = 0 ; optDesc [i].description != NULL ; ++i) + { + if (! Option.etags || optDesc [i].usedByEtags) + puts (optDesc [i].description); + } +} + +static void printFeatureList (void) +{ + int i; + + for (i = 0 ; Features [i] != NULL ; ++i) + { + if (i == 0) + printf (" Optional compiled features: "); + printf ("%s+%s", (i>0 ? ", " : ""), Features [i]); +#ifdef CUSTOM_CONFIGURATION_FILE + if (strcmp (Features [i], "custom-conf") == 0) + printf ("=%s", CUSTOM_CONFIGURATION_FILE); +#endif + } + if (i > 0) + putchar ('\n'); +} + +static void printProgramIdentification (void) +{ + printf ("%s %s, %s %s\n", + PROGRAM_NAME, PROGRAM_VERSION, + PROGRAM_COPYRIGHT, AUTHOR_NAME); + printf (" Addresses: <%s>, %s\n", AUTHOR_EMAIL, PROGRAM_URL); + printFeatureList (); +} + +static void processHelpOption ( + const char *const option __unused, + const char *const parameter __unused) +{ + printProgramIdentification (); + putchar ('\n'); + printInvocationDescription (); + putchar ('\n'); + printOptionDescriptions (LongOptionDescription); + exit (0); +} + +static void processLanguageForceOption ( + const char *const option, const char *const parameter) +{ + langType language; + if (strcasecmp (parameter, "auto") == 0) + language = LANG_AUTO; + else + language = getNamedLanguage (parameter); + + if (strcmp (option, "lang") == 0 || strcmp (option, "language") == 0) + error (WARNING, + "\"--%s\" option is obsolete; use \"--language-force\" instead", + option); + if (language == LANG_IGNORE) + error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option); + else + Option.language = language; +} +static char* skipPastMap (char* p) +{ + while (*p != EXTENSION_SEPARATOR && + *p != PATTERN_START && *p != ',' && *p != '\0') + ++p; + return p; +} + +/* Parses the mapping beginning at `map', adds it to the language map, and + * returns first character past the map. + */ +static char* addLanguageMap (const langType language, char* map) +{ + char* p = NULL; + const char first = *map; + if (first == EXTENSION_SEPARATOR) /* extension map */ + { + ++map; + p = skipPastMap (map); + if (*p == '\0') + { + verbose (" .%s", map); + addLanguageExtensionMap (language, map); + p = map + strlen (map); + } + else + { + const char separator = *p; + *p = '\0'; + verbose (" .%s", map); + addLanguageExtensionMap (language, map); + *p = separator; + } + } + else if (first == PATTERN_START) /* pattern map */ + { + ++map; + for (p = map ; *p != PATTERN_STOP && *p != '\0' ; ++p) + { + if (*p == '\\' && *(p + 1) == PATTERN_STOP) + ++p; + } + if (*p == '\0') + error (FATAL, "Unterminated file name pattern for %s language", + getLanguageName (language)); + else + { + *p++ = '\0'; + verbose (" (%s)", map); + addLanguagePatternMap (language, map); + } + } + else + error (FATAL, "Badly formed language map for %s language", + getLanguageName (language)); + return p; +} + +static char* processLanguageMap (char* map) +{ + char* const separator = strchr (map, ':'); + char* result = NULL; + if (separator != NULL) + { + langType language; + char *list = separator + 1; + boolean clear = FALSE; + *separator = '\0'; + language = getNamedLanguage (map); + if (language != LANG_IGNORE) + { + const char *const deflt = "default"; + char* p; + if (*list == '+') + ++list; + else + clear = TRUE; + for (p = list ; *p != ',' && *p != '\0' ; ++p) /*no-op*/ ; + if ((size_t) (p - list) == strlen (deflt) && + strncasecmp (list, deflt, p - list) == 0) + { + verbose (" Restoring default %s language map: ", getLanguageName (language)); + installLanguageMapDefault (language); + list = p; + } + else + { + if (clear) + { + verbose (" Setting %s language map:", getLanguageName (language)); + clearLanguageMap (language); + } + else + verbose (" Adding to %s language map:", getLanguageName (language)); + while (list != NULL && *list != '\0' && *list != ',') + list = addLanguageMap (language, list); + verbose ("\n"); + } + if (list != NULL && *list == ',') + ++list; + result = list; + } + } + return result; +} + +static void processLanguageMapOption ( + const char *const option, const char *const parameter) +{ + char *const maps = eStrdup (parameter); + char *map = maps; + + if (strcmp (parameter, "default") == 0) + { + verbose (" Restoring default language maps:\n"); + installLanguageMapDefaults (); + } + else while (map != NULL && *map != '\0') + { + char* const next = processLanguageMap (map); + if (next == NULL) + error (WARNING, "Unknown language \"%s\" in \"%s\" option", parameter, option); + map = next; + } + eFree (maps); +} + +static void processLanguagesOption ( + const char *const option, const char *const parameter) +{ + char *const langs = eStrdup (parameter); + enum { Add, Remove, Replace } mode = Replace; + boolean first = TRUE; + char *lang = langs; + const char* prefix = ""; + verbose (" Enabled languages: "); + while (lang != NULL) + { + char *const end = strchr (lang, ','); + if (lang [0] == '+') + { + ++lang; + mode = Add; + prefix = "+ "; + } + else if (lang [0] == '-') + { + ++lang; + mode = Remove; + prefix = "- "; + } + if (mode == Replace) + enableLanguages (FALSE); + if (end != NULL) + *end = '\0'; + if (lang [0] != '\0') + { + if (strcmp (lang, "all") == 0) + enableLanguages ((boolean) (mode != Remove)); + else + { + const langType language = getNamedLanguage (lang); + if (language == LANG_IGNORE) + error (WARNING, "Unknown language \"%s\" in \"%s\" option", lang, option); + else + enableLanguage (language, (boolean) (mode != Remove)); + } + verbose ("%s%s%s", (first ? "" : ", "), prefix, lang); + prefix = ""; + first = FALSE; + if (mode == Replace) + mode = Add; + } + lang = (end != NULL ? end + 1 : NULL); + } + verbose ("\n"); + eFree (langs); +} + +static void processLicenseOption ( + const char *const option __unused, + const char *const parameter __unused) +{ + printProgramIdentification (); + puts (""); + puts (License1); + puts (License2); + exit (0); +} + +static void processListKindsOption ( + const char *const option, const char *const parameter) +{ + if (parameter [0] == '\0' || strcasecmp (parameter, "all") == 0) + printLanguageKinds (LANG_AUTO); + else + { + langType language = getNamedLanguage (parameter); + if (language == LANG_IGNORE) + error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option); + else + printLanguageKinds (language); + } + exit (0); +} + +static void processListMapsOption ( + const char *const __unused option, + const char *const __unused parameter) +{ + if (parameter [0] == '\0' || strcasecmp (parameter, "all") == 0) + printLanguageMaps (LANG_AUTO); + else + { + langType language = getNamedLanguage (parameter); + if (language == LANG_IGNORE) + error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option); + else + printLanguageMaps (language); + } + exit (0); +} + +static void processListLanguagesOption ( + const char *const option __unused, + const char *const parameter __unused) +{ + printLanguageList (); + exit (0); +} + +static void processOptionFile ( + const char *const option, const char *const parameter) +{ + if (parameter [0] == '\0') + error (WARNING, "no option file supplied for \"%s\"", option); + else if (! parseFileOptions (parameter)) + error (FATAL | PERROR, "cannot open option file \"%s\"", parameter); +} + +static void processSortOption ( + const char *const option, const char *const parameter) +{ + if (isFalse (parameter)) + Option.sorted = SO_UNSORTED; + else if (isTrue (parameter)) + Option.sorted = SO_SORTED; + else if (strcasecmp (parameter, "f") == 0 || + strcasecmp (parameter, "fold") == 0 || + strcasecmp (parameter, "foldcase") == 0) + Option.sorted = SO_FOLDSORTED; + else + error (FATAL, "Invalid value for \"%s\" option", option); +} + +static void installHeaderListDefaults (void) +{ + Option.headerExt = stringListNewFromArgv (HeaderExtensions); + if (Option.verbose) + { + printf (" Setting default header extensions: "); + stringListPrint (Option.headerExt); + putchar ('\n'); + } +} + +static void processHeaderListOption (const int option, const char *parameter) +{ + /* Check to make sure that the user did not enter "ctags -h *.c" + * by testing to see if the list is a filename that exists. + */ + if (doesFileExist (parameter)) + error (FATAL, "-%c: Invalid list", option); + if (strcmp (parameter, "default") == 0) + installHeaderListDefaults (); + else + { + boolean clear = TRUE; + + if (parameter [0] == '+') + { + ++parameter; + clear = FALSE; + } + if (Option.headerExt == NULL) + Option.headerExt = stringListNew (); + verbose (" Header Extensions:\n"); + addExtensionList (Option.headerExt, parameter, clear); + } +} + +/* + * Token ignore processing + */ + +/* Determines whether or not "name" should be ignored, per the ignore list. + */ +extern boolean isIgnoreToken ( + const char *const name, boolean *const pIgnoreParens, + const char **const replacement) +{ + boolean result = FALSE; + + if (Option.ignore != NULL) + { + const size_t nameLen = strlen (name); + unsigned int i; + + if (pIgnoreParens != NULL) + *pIgnoreParens = FALSE; + + for (i = 0 ; i < stringListCount (Option.ignore) ; ++i) + { + vString *token = stringListItem (Option.ignore, i); + + if (strncmp (vStringValue (token), name, nameLen) == 0) + { + const size_t tokenLen = vStringLength (token); + + if (nameLen == tokenLen) + { + result = TRUE; + break; + } + else if (tokenLen == nameLen + 1 && + vStringChar (token, tokenLen - 1) == '+') + { + result = TRUE; + if (pIgnoreParens != NULL) + *pIgnoreParens = TRUE; + break; + } + else if (vStringChar (token, nameLen) == '=') + { + if (replacement != NULL) + *replacement = vStringValue (token) + nameLen + 1; + break; + } + } + } + } + return result; +} + +static void saveIgnoreToken (vString *const ignoreToken) +{ + if (Option.ignore == NULL) + Option.ignore = stringListNew (); + stringListAdd (Option.ignore, ignoreToken); + verbose (" ignore token: %s\n", vStringValue (ignoreToken)); +} + +static void readIgnoreList (const char *const list) +{ + char* newList = stringCopy (list); + const char *token = strtok (newList, IGNORE_SEPARATORS); + + while (token != NULL) + { + vString *const entry = vStringNewInit (token); + + saveIgnoreToken (entry); + token = strtok (NULL, IGNORE_SEPARATORS); + } + eFree (newList); +} + +static void addIgnoreListFromFile (const char *const fileName) +{ + stringList* tokens = stringListNewFromFile (fileName); + if (tokens == NULL) + error (FATAL | PERROR, "cannot open \"%s\"", fileName); + if (Option.ignore == NULL) + Option.ignore = tokens; + else + stringListCombine (Option.ignore, tokens); +} + +static void processIgnoreOption (const char *const list) +{ + if (strchr ("@./\\", list [0]) != NULL) + { + const char* fileName = (*list == '@') ? list + 1 : list; + addIgnoreListFromFile (fileName); + } +#if defined (MSDOS) || defined (WIN32) || defined (OS2) + else if (isalpha (list [0]) && list [1] == ':') + addIgnoreListFromFile (list); +#endif + else if (strcmp (list, "-") == 0) + { + freeList (&Option.ignore); + verbose (" clearing list\n"); + } + else + readIgnoreList (list); +} + +static void processVersionOption ( + const char *const option __unused, + const char *const parameter __unused) +{ + printProgramIdentification (); + exit (0); +} + +/* + * Option tables + */ + +static parametricOption ParametricOptions [] = { + { "etags-include", processEtagsInclude, FALSE }, + { "exclude", processExcludeOption, FALSE }, + { "excmd", processExcmdOption, FALSE }, + { "extra", processExtraTagsOption, FALSE }, + { "fields", processFieldsOption, FALSE }, + { "filter-terminator", processFilterTerminatorOption, TRUE }, + { "format", processFormatOption, TRUE }, + { "help", processHelpOption, TRUE }, + { "lang", processLanguageForceOption, FALSE }, + { "language", processLanguageForceOption, FALSE }, + { "language-force", processLanguageForceOption, FALSE }, + { "languages", processLanguagesOption, FALSE }, + { "langdef", processLanguageDefineOption, FALSE }, + { "langmap", processLanguageMapOption, FALSE }, + { "license", processLicenseOption, TRUE }, + { "list-kinds", processListKindsOption, TRUE }, + { "list-maps", processListMapsOption, TRUE }, + { "list-languages", processListLanguagesOption, TRUE }, + { "options", processOptionFile, FALSE }, + { "sort", processSortOption, TRUE }, + { "version", processVersionOption, TRUE }, +}; + +static booleanOption BooleanOptions [] = { + { "append", &Option.append, TRUE }, + { "file-scope", &Option.include.fileScope, FALSE }, + { "file-tags", &Option.include.fileNames, FALSE }, + { "filter", &Option.filter, TRUE }, + { "if0", &Option.if0, FALSE }, + { "kind-long", &Option.kindLong, TRUE }, + { "line-directives",&Option.lineDirectives, FALSE }, + { "links", &Option.followLinks, FALSE }, +#ifdef RECURSE_SUPPORTED + { "recurse", &Option.recurse, FALSE }, +#endif + { "tag-relative", &Option.tagRelative, TRUE }, + { "totals", &Option.printTotals, TRUE }, + { "verbose", &Option.verbose, FALSE }, +}; + +/* + * Generic option parsing + */ + +static void checkOptionOrder (const char* const option) +{ + if (NonOptionEncountered) + error (FATAL, "-%s option may not follow a file name", option); +} + +static boolean processParametricOption ( + const char *const option, const char *const parameter) +{ + const int count = sizeof (ParametricOptions) / sizeof (parametricOption); + boolean found = FALSE; + int i; + + for (i = 0 ; i < count && ! found ; ++i) + { + parametricOption* const entry = &ParametricOptions [i]; + if (strcmp (option, entry->name) == 0) + { + found = TRUE; + if (entry->initOnly) + checkOptionOrder (option); + (entry->handler) (option, parameter); + } + } + return found; +} + +static boolean getBooleanOption ( + const char *const option, const char *const parameter) +{ + boolean selection = TRUE; + + if (parameter [0] == '\0') + selection = TRUE; + else if (isFalse (parameter)) + selection = FALSE; + else if (isTrue (parameter)) + selection = TRUE; + else + error (FATAL, "Invalid value for \"%s\" option", option); + + return selection; +} + +static boolean processBooleanOption ( + const char *const option, const char *const parameter) +{ + const int count = sizeof (BooleanOptions) / sizeof (booleanOption); + boolean found = FALSE; + int i; + + for (i = 0 ; i < count && ! found ; ++i) + { + booleanOption* const entry = &BooleanOptions [i]; + if (strcmp (option, entry->name) == 0) + { + found = TRUE; + if (entry->initOnly) + checkOptionOrder (option); + *entry->pValue = getBooleanOption (option, parameter); + } + } + return found; +} + +static void processLongOption ( + const char *const option, const char *const parameter) +{ + Assert (parameter != NULL); + if (parameter == NULL && parameter [0] == '\0') + verbose (" Option: --%s\n", option); + else + verbose (" Option: --%s=%s\n", option, parameter); + + if (processBooleanOption (option, parameter)) + ; + else if (processParametricOption (option, parameter)) + ; + else if (processKindOption (option, parameter)) + ; + else if (processRegexOption (option, parameter)) + ; +#ifndef RECURSE_SUPPORTED + else if (strcmp (option, "recurse") == 0) + error (WARNING, "%s option not supported on this host", option); +#endif + else + error (FATAL, "Unknown option: --%s", option); +} + +static void processShortOption ( + const char *const option, const char *const parameter) +{ + if (parameter == NULL || parameter [0] == '\0') + verbose (" Option: -%s\n", option); + else + verbose (" Option: -%s %s\n", option, parameter); + + if (isCompoundOption (*option) && (parameter == NULL || parameter [0] == '\0')) + error (FATAL, "Missing parameter for \"%s\" option", option); + else switch (*option) + { + case '?': + processHelpOption ("?", NULL); + exit (0); + break; + case 'a': + checkOptionOrder (option); + Option.append = TRUE; + break; +#ifdef DEBUG + case 'b': + if (atol (parameter) < 0) + error (FATAL, "-%s: Invalid line number", option); + Option.breakLine = atol (parameter); + break; + case 'D': + Option.debugLevel = strtol (parameter, NULL, 0); + if (debug (DEBUG_STATUS)) + Option.verbose = TRUE; + break; +#endif + case 'B': + Option.backward = TRUE; + break; + case 'e': + checkOptionOrder (option); + setEtagsMode (); + break; + case 'f': + case 'o': + checkOptionOrder (option); + if (Option.tagFileName != NULL) + { + error (WARNING, + "-%s option specified more than once, last value used", + option); + freeString (&Option.tagFileName); + } + else if (parameter [0] == '-' && parameter [1] != '\0') + error (FATAL, "output file name may not begin with a '-'"); + Option.tagFileName = stringCopy (parameter); + break; + case 'F': + Option.backward = FALSE; + break; + case 'h': + processHeaderListOption (*option, parameter); + break; + case 'I': + processIgnoreOption (parameter); + break; + case 'L': + if (Option.fileList != NULL) + { + error (WARNING, + "-%s option specified more than once, last value used", + option); + freeString (&Option.fileList); + } + Option.fileList = stringCopy (parameter); + break; + case 'n': + Option.locate = EX_LINENUM; + break; + case 'N': + Option.locate = EX_PATTERN; + break; + case 'R': +#ifdef RECURSE_SUPPORTED + Option.recurse = TRUE; +#else + error (WARNING, "-%s option not supported on this host", option); +#endif + break; + case 'u': + checkOptionOrder (option); + Option.sorted = SO_UNSORTED; + break; + case 'V': + Option.verbose = TRUE; + break; + case 'w': + /* silently ignored */ + break; + case 'x': + checkOptionOrder (option); + Option.xref = TRUE; + break; + default: + error (FATAL, "Unknown option: -%s", option); + break; + } +} + +extern void parseOption (cookedArgs* const args) +{ + Assert (! cArgOff (args)); + if (args->isOption) + { + if (args->longOption) + processLongOption (args->item, args->parameter); + else + { + const char *parameter = args->parameter; + while (*parameter == ' ') + ++parameter; + processShortOption (args->item, parameter); + } + cArgForth (args); + } +} + +extern void parseOptions (cookedArgs* const args) +{ + NonOptionEncountered = FALSE; + while (! cArgOff (args) && cArgIsOption (args)) + parseOption (args); + if (! cArgOff (args) && ! cArgIsOption (args)) + NonOptionEncountered = TRUE; +} + +static const char *CheckFile; +static boolean checkSameFile (const char *const fileName) +{ + return isSameFile (CheckFile, fileName); +} + +static boolean parseFileOptions (const char* const fileName) +{ + boolean fileFound = FALSE; + const char* const format = "Considering option file %s: %s\n"; + CheckFile = fileName; + if (stringListHasTest (OptionFiles, checkSameFile)) + verbose (format, fileName, "already considered"); + else + { + FILE* const fp = fopen (fileName, "r"); + if (fp == NULL) + verbose (format, fileName, "not found"); + else + { + cookedArgs* const args = cArgNewFromLineFile (fp); + vString* file = vStringNewInit (fileName); + stringListAdd (OptionFiles, file); + verbose (format, fileName, "reading..."); + parseOptions (args); + if (NonOptionEncountered) + error (WARNING, "Ignoring non-option in %s\n", fileName); + cArgDelete (args); + fclose (fp); + fileFound = TRUE; + } + } + return fileFound; +} + +/* Actions to be taken before reading any other options */ +extern void previewFirstOption (cookedArgs* const args) +{ + while (cArgIsOption (args)) + { + if (strcmp (args->item, "V") == 0 || strcmp (args->item, "verbose") == 0) + parseOption (args); + else if (strcmp (args->item, "options") == 0 && + strcmp (args->parameter, "NONE") == 0) + { + fprintf (stderr, "No options will be read from files or environment\n"); + SkipConfiguration = TRUE; + cArgForth (args); + } + else + break; + } +} + +static void parseConfigurationFileOptionsInDirectoryWithLeafname (const char* directory, const char* leafname) +{ + vString* const pathname = combinePathAndFile (directory, leafname); + parseFileOptions (vStringValue (pathname)); + vStringDelete (pathname); +} + +static void parseConfigurationFileOptionsInDirectory (const char* directory) +{ + parseConfigurationFileOptionsInDirectoryWithLeafname (directory, ".ctags"); +#ifdef MSDOS_STYLE_PATH + parseConfigurationFileOptionsInDirectoryWithLeafname (directory, "ctags.cnf"); +#endif +} + +static void parseConfigurationFileOptions (void) +{ + /* We parse .ctags on all systems, and additionally ctags.cnf on DOS. */ + const char* const home = getenv ("HOME"); +#ifdef CUSTOM_CONFIGURATION_FILE + parseFileOptions (CUSTOM_CONFIGURATION_FILE); +#endif +#ifdef MSDOS_STYLE_PATH + parseFileOptions ("/ctags.cnf"); +#endif + parseFileOptions ("/etc/ctags.conf"); + parseFileOptions ("/usr/local/etc/ctags.conf"); + if (home != NULL) + { + parseConfigurationFileOptionsInDirectory (home); + } + else + { +#ifdef MSDOS_STYLE_PATH + /* + * Windows users don't usually set HOME. + * The OS sets HOMEDRIVE and HOMEPATH for them. + */ + const char* homeDrive = getenv ("HOMEDRIVE"); + const char* homePath = getenv ("HOMEPATH"); + if (homeDrive != NULL && homePath != NULL) + { + vString* const windowsHome = vStringNew (); + vStringCatS (windowsHome, homeDrive); + vStringCatS (windowsHome, homePath); + parseConfigurationFileOptionsInDirectory (vStringValue (windowsHome)); + vStringDelete (windowsHome); + } +#endif + } + parseConfigurationFileOptionsInDirectory ("."); +} + +static void parseEnvironmentOptions (void) +{ + const char *envOptions = NULL; + const char* var = NULL; + + if (Option.etags) + { + var = ETAGS_ENVIRONMENT; + envOptions = getenv (var); + } + if (envOptions == NULL) + { + var = CTAGS_ENVIRONMENT; + envOptions = getenv (var); + } + if (envOptions != NULL && envOptions [0] != '\0') + { + cookedArgs* const args = cArgNewFromString (envOptions); + verbose ("Reading options from $CTAGS\n"); + parseOptions (args); + cArgDelete (args); + if (NonOptionEncountered) + error (WARNING, "Ignoring non-option in %s variable", var); + } +} + +extern void readOptionConfiguration (void) +{ + if (! SkipConfiguration) + { + parseConfigurationFileOptions (); + parseEnvironmentOptions (); + } +} + +/* +* Option initialization +*/ + +extern void initOptions (void) +{ + OptionFiles = stringListNew (); + verbose ("Setting option defaults\n"); + installHeaderListDefaults (); + verbose (" Installing default language mappings:\n"); + installLanguageMapDefaults (); + + /* always excluded by default */ + verbose (" Installing default exclude patterns:\n"); + processExcludeOption (NULL, "{arch}"); + processExcludeOption (NULL, ".arch-ids"); + processExcludeOption (NULL, ".arch-inventory"); + processExcludeOption (NULL, "autom4te.cache"); + processExcludeOption (NULL, "BitKeeper"); + processExcludeOption (NULL, ".bzr"); + processExcludeOption (NULL, ".bzrignore"); + processExcludeOption (NULL, "CVS"); + processExcludeOption (NULL, ".cvsignore"); + processExcludeOption (NULL, "_darcs"); + processExcludeOption (NULL, ".deps"); + processExcludeOption (NULL, "EIFGEN"); + processExcludeOption (NULL, ".git"); + processExcludeOption (NULL, ".hg"); + processExcludeOption (NULL, "PENDING"); + processExcludeOption (NULL, "RCS"); + processExcludeOption (NULL, "RESYNC"); + processExcludeOption (NULL, "SCCS"); + processExcludeOption (NULL, ".svn"); +} + +extern void freeOptionResources (void) +{ + freeString (&Option.tagFileName); + freeString (&Option.fileList); + freeString (&Option.filterTerminator); + + freeList (&Excluded); + freeList (&Option.ignore); + freeList (&Option.headerExt); + freeList (&Option.etagsInclude); + freeList (&OptionFiles); +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/options.h b/third_party/ctags/options.h new file mode 100644 index 000000000..b11b2e0e8 --- /dev/null +++ b/third_party/ctags/options.h @@ -0,0 +1,155 @@ +// clang-format off +/* +* $Id: options.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Defines external interface to option processing. +*/ +#ifndef _OPTIONS_H +#define _OPTIONS_H + +#if defined(OPTION_WRITE) || defined(VAXC) +# define CONST_OPTION +#else +# define CONST_OPTION const +#endif + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + + + +#include "third_party/ctags/args.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/strlist.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DECLARATIONS +*/ + +typedef enum { OPTION_NONE, OPTION_SHORT, OPTION_LONG } optionType; + +typedef struct sCookedArgs { + /* private */ + Arguments* args; + char *shortOptions; + char simple[2]; + boolean isOption; + boolean longOption; + const char* parameter; + /* public */ + char* item; +} cookedArgs; + +typedef enum eLocate { + EX_MIX, /* line numbers for defines, patterns otherwise */ + EX_LINENUM, /* -n only line numbers in tag file */ + EX_PATTERN /* -N only patterns in tag file */ +} exCmd; + +typedef enum sortType { + SO_UNSORTED, + SO_SORTED, + SO_FOLDSORTED +} sortType; + +struct sInclude { + boolean fileNames; /* include tags for source file names */ + boolean qualifiedTags; /* include tags for qualified class members */ + boolean fileScope; /* include tags of file scope only */ +}; + +struct sExtFields { /* extension field content control */ + boolean access; + boolean fileScope; + boolean implementation; + boolean inheritance; + boolean kind; + boolean kindKey; + boolean kindLong; + boolean language; + boolean lineNumber; + boolean scope; + boolean signature; + boolean typeRef; +}; + +/* This stores the command line options. + */ +typedef struct sOptionValues { + struct sInclude include;/* --extra extra tag inclusion */ + struct sExtFields extensionFields;/* --fields extension field control */ + stringList* ignore; /* -I name of file containing tokens to ignore */ + boolean append; /* -a append to "tags" file */ + boolean backward; /* -B regexp patterns search backwards */ + boolean etags; /* -e output Emacs style tags file */ + exCmd locate; /* --excmd EX command used to locate tag */ + boolean recurse; /* -R recurse into directories */ + sortType sorted; /* -u,--sort sort tags */ + boolean verbose; /* -V verbose */ + boolean xref; /* -x generate xref output instead */ + char *fileList; /* -L name of file containing names of files */ + char *tagFileName; /* -o name of tags file */ + stringList* headerExt; /* -h header extensions */ + stringList* etagsInclude;/* --etags-include list of TAGS files to include*/ + unsigned int tagFileFormat;/* --format tag file format (level) */ + boolean if0; /* --if0 examine code within "#if 0" branch */ + boolean kindLong; /* --kind-long */ + langType language; /* --lang specified language override */ + boolean followLinks; /* --link follow symbolic links? */ + boolean filter; /* --filter behave as filter: files in, tags out */ + char* filterTerminator; /* --filter-terminator string to output */ + boolean tagRelative; /* --tag-relative file paths relative to tag file */ + boolean printTotals; /* --totals print cumulative statistics */ + boolean lineDirectives; /* --linedirectives process #line directives */ +#ifdef DEBUG + long debugLevel; /* -D debugging output */ + unsigned long breakLine;/* -b source line at which to call lineBreak() */ +#endif +} optionValues; + +/* +* GLOBAL VARIABLES +*/ +extern CONST_OPTION optionValues Option; + +/* +* FUNCTION PROTOTYPES +*/ +extern void verbose (const char *const format, ...) __printf (1, 2); +extern void freeList (stringList** const pString); +extern void setDefaultTagFileName (void); +extern void checkOptions (void); +extern boolean filesRequired (void); +extern void testEtagsInvocation (void); + +extern cookedArgs* cArgNewFromString (const char* string); +extern cookedArgs* cArgNewFromArgv (char* const* const argv); +extern cookedArgs* cArgNewFromFile (FILE* const fp); +extern cookedArgs* cArgNewFromLineFile (FILE* const fp); +extern void cArgDelete (cookedArgs* const current); +extern boolean cArgOff (cookedArgs* const current); +extern boolean cArgIsOption (cookedArgs* const current); +extern const char* cArgItem (cookedArgs* const current); +extern void cArgForth (cookedArgs* const current); + +extern boolean isExcludedFile (const char* const name); +extern boolean isIncludeFile (const char *const fileName); +extern boolean isIgnoreToken (const char *const name, boolean *const pIgnoreParens, const char **const replacement); +extern void parseOption (cookedArgs* const cargs); +extern void parseOptions (cookedArgs* const cargs); +extern void previewFirstOption (cookedArgs* const cargs); +extern void readOptionConfiguration (void); +extern void initOptions (void); +extern void freeOptionResources (void); + +#endif /* _OPTIONS_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/parse.c b/third_party/ctags/parse.c new file mode 100644 index 000000000..692d57e93 --- /dev/null +++ b/third_party/ctags/parse.c @@ -0,0 +1,672 @@ +// clang-format off +/* +* $Id: parse.c 597 2007-07-31 05:35:30Z dhiebert $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for managing source languages and +* dispatching files to the appropriate language parser. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/main.h" +#define OPTION_WRITE +#include "third_party/ctags/options.h" +#include "third_party/ctags/parsers.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +static parserDefinitionFunc* BuiltInParsers[] = { PARSER_LIST }; +static parserDefinition** LanguageTable = NULL; +static unsigned int LanguageCount = 0; + +/* +* FUNCTION DEFINITIONS +*/ + +extern void makeSimpleTag ( + const vString* const name, kindOption* const kinds, const int kind) +{ + if (kinds [kind].enabled && name != NULL && vStringLength (name) > 0) + { + tagEntryInfo e; + initTagEntry (&e, vStringValue (name)); + + e.kindName = kinds [kind].name; + e.kind = kinds [kind].letter; + + makeTagEntry (&e); + } +} + +/* +* parserDescription mapping management +*/ + +extern parserDefinition* parserNew (const char* name) +{ + parserDefinition* result = xCalloc (1, parserDefinition); + result->name = eStrdup (name); + return result; +} + +extern const char *getLanguageName (const langType language) +{ + const char* result; + if (language == LANG_IGNORE) + result = "unknown"; + else + { + Assert (0 <= language && language < (int) LanguageCount); + result = LanguageTable [language]->name; + } + return result; +} + +extern langType getNamedLanguage (const char *const name) +{ + langType result = LANG_IGNORE; + unsigned int i; + Assert (name != NULL); + for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i) + { + const parserDefinition* const lang = LanguageTable [i]; + if (lang->name != NULL) + if (strcasecmp (name, lang->name) == 0) + result = i; + } + return result; +} + +static langType getExtensionLanguage (const char *const extension) +{ + langType result = LANG_IGNORE; + unsigned int i; + for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i) + { + stringList* const exts = LanguageTable [i]->currentExtensions; + if (exts != NULL && stringListExtensionMatched (exts, extension)) + result = i; + } + return result; +} + +static langType getPatternLanguage (const char *const fileName) +{ + langType result = LANG_IGNORE; + const char* base = baseFilename (fileName); + unsigned int i; + for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i) + { + stringList* const ptrns = LanguageTable [i]->currentPatterns; + if (ptrns != NULL && stringListFileMatched (ptrns, base)) + result = i; + } + return result; +} + +#ifdef SYS_INTERPRETER + +/* The name of the language interpreter, either directly or as the argument + * to "env". + */ +static vString* determineInterpreter (const char* const cmd) +{ + vString* const interpreter = vStringNew (); + const char* p = cmd; + do + { + vStringClear (interpreter); + for ( ; isspace ((int) *p) ; ++p) + ; /* no-op */ + for ( ; *p != '\0' && ! isspace ((int) *p) ; ++p) + vStringPut (interpreter, (int) *p); + vStringTerminate (interpreter); + } while (strcmp (vStringValue (interpreter), "env") == 0); + return interpreter; +} + +static langType getInterpreterLanguage (const char *const fileName) +{ + langType result = LANG_IGNORE; + FILE* const fp = fopen (fileName, "r"); + if (fp != NULL) + { + vString* const vLine = vStringNew (); + const char* const line = readLine (vLine, fp); + if (line != NULL && line [0] == '#' && line [1] == '!') + { + const char* const lastSlash = strrchr (line, '/'); + const char *const cmd = lastSlash != NULL ? lastSlash+1 : line+2; + vString* const interpreter = determineInterpreter (cmd); + result = getExtensionLanguage (vStringValue (interpreter)); + if (result == LANG_IGNORE) + result = getNamedLanguage (vStringValue (interpreter)); + vStringDelete (interpreter); + } + vStringDelete (vLine); + fclose (fp); + } + return result; +} + +#endif + +extern langType getFileLanguage (const char *const fileName) +{ + langType language = Option.language; + if (language == LANG_AUTO) + { + language = getExtensionLanguage (fileExtension (fileName)); + if (language == LANG_IGNORE) + language = getPatternLanguage (fileName); +#ifdef SYS_INTERPRETER + if (language == LANG_IGNORE) + { + fileStatus *status = eStat (fileName); + if (status->isExecutable) + language = getInterpreterLanguage (fileName); + } +#endif + } + return language; +} + +extern void printLanguageMap (const langType language) +{ + boolean first = TRUE; + unsigned int i; + stringList* map = LanguageTable [language]->currentPatterns; + Assert (0 <= language && language < (int) LanguageCount); + for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i) + { + printf ("%s(%s)", (first ? "" : " "), + vStringValue (stringListItem (map, i))); + first = FALSE; + } + map = LanguageTable [language]->currentExtensions; + for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i) + { + printf ("%s.%s", (first ? "" : " "), + vStringValue (stringListItem (map, i))); + first = FALSE; + } +} + +extern void installLanguageMapDefault (const langType language) +{ + parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->currentPatterns != NULL) + stringListDelete (lang->currentPatterns); + if (lang->currentExtensions != NULL) + stringListDelete (lang->currentExtensions); + + if (lang->patterns == NULL) + lang->currentPatterns = stringListNew (); + else + { + lang->currentPatterns = + stringListNewFromArgv (lang->patterns); + } + if (lang->extensions == NULL) + lang->currentExtensions = stringListNew (); + else + { + lang->currentExtensions = + stringListNewFromArgv (lang->extensions); + } + if (Option.verbose) + printLanguageMap (language); + verbose ("\n"); +} + +extern void installLanguageMapDefaults (void) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + { + verbose (" %s: ", getLanguageName (i)); + installLanguageMapDefault (i); + } +} + +extern void clearLanguageMap (const langType language) +{ + Assert (0 <= language && language < (int) LanguageCount); + stringListClear (LanguageTable [language]->currentPatterns); + stringListClear (LanguageTable [language]->currentExtensions); +} + +extern void addLanguagePatternMap (const langType language, const char* ptrn) +{ + vString* const str = vStringNewInit (ptrn); + parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->currentPatterns == NULL) + lang->currentPatterns = stringListNew (); + stringListAdd (lang->currentPatterns, str); +} + +extern boolean removeLanguageExtensionMap (const char *const extension) +{ + boolean result = FALSE; + unsigned int i; + for (i = 0 ; i < LanguageCount && ! result ; ++i) + { + stringList* const exts = LanguageTable [i]->currentExtensions; + if (exts != NULL && stringListRemoveExtension (exts, extension)) + { + verbose (" (removed from %s)", getLanguageName (i)); + result = TRUE; + } + } + return result; +} + +extern void addLanguageExtensionMap ( + const langType language, const char* extension) +{ + vString* const str = vStringNewInit (extension); + Assert (0 <= language && language < (int) LanguageCount); + removeLanguageExtensionMap (extension); + stringListAdd (LanguageTable [language]->currentExtensions, str); +} + +extern void enableLanguage (const langType language, const boolean state) +{ + Assert (0 <= language && language < (int) LanguageCount); + LanguageTable [language]->enabled = state; +} + +extern void enableLanguages (const boolean state) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + enableLanguage (i, state); +} + +static void initializeParsers (void) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + if (LanguageTable [i]->initialize != NULL) + (LanguageTable [i]->initialize) ((langType) i); +} + +extern void initializeParsing (void) +{ + unsigned int builtInCount; + unsigned int i; + + builtInCount = sizeof (BuiltInParsers) / sizeof (BuiltInParsers [0]); + LanguageTable = xMalloc (builtInCount, parserDefinition*); + + verbose ("Installing parsers: "); + for (i = 0 ; i < builtInCount ; ++i) + { + parserDefinition* const def = (*BuiltInParsers [i]) (); + if (def != NULL) + { + boolean accepted = FALSE; + if (def->name == NULL || def->name[0] == '\0') + error (FATAL, "parser definition must contain name\n"); + else if (def->regex) + { + def->parser = findRegexTags; + accepted = TRUE; + } + else if ((def->parser == NULL) == (def->parser2 == NULL)) + error (FATAL, + "%s parser definition must define one and only one parsing routine\n", + def->name); + else + accepted = TRUE; + if (accepted) + { + verbose ("%s%s", i > 0 ? ", " : "", def->name); + def->id = LanguageCount++; + LanguageTable [def->id] = def; + } + } + } + verbose ("\n"); + enableLanguages (TRUE); + initializeParsers (); +} + +extern void freeParserResources (void) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + { + parserDefinition* const lang = LanguageTable [i]; + freeList (&lang->currentPatterns); + freeList (&lang->currentExtensions); + eFree (lang->name); + lang->name = NULL; + eFree (lang); + } + if (LanguageTable != NULL) + eFree (LanguageTable); + LanguageTable = NULL; + LanguageCount = 0; +} + +/* +* Option parsing +*/ + +extern void processLanguageDefineOption ( + const char *const option, const char *const parameter __unused) +{ + if (parameter [0] == '\0') + error (WARNING, "No language specified for \"%s\" option", option); + else if (getNamedLanguage (parameter) != LANG_IGNORE) + error (WARNING, "Language \"%s\" already defined", parameter); + else + { + unsigned int i = LanguageCount++; + parserDefinition* const def = parserNew (parameter); + def->parser = findRegexTags; + def->currentPatterns = stringListNew (); + def->currentExtensions = stringListNew (); + def->regex = TRUE; + def->enabled = TRUE; + def->id = i; + LanguageTable = xRealloc (LanguageTable, i + 1, parserDefinition*); + LanguageTable [i] = def; + } +} + +static kindOption *langKindOption (const langType language, const int flag) +{ + unsigned int i; + kindOption* result = NULL; + const parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + for (i=0 ; i < lang->kindCount && result == NULL ; ++i) + if (lang->kinds [i].letter == flag) + result = &lang->kinds [i]; + return result; +} + +static void disableLanguageKinds (const langType language) +{ + const parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->regex) + disableRegexKinds (language); + else + { + unsigned int i; + for (i = 0 ; i < lang->kindCount ; ++i) + lang->kinds [i].enabled = FALSE; + } +} + +static boolean enableLanguageKind ( + const langType language, const int kind, const boolean mode) +{ + boolean result = FALSE; + if (LanguageTable [language]->regex) + result = enableRegexKind (language, kind, mode); + else + { + kindOption* const opt = langKindOption (language, kind); + if (opt != NULL) + { + opt->enabled = mode; + result = TRUE; + } + } + return result; +} + +static void processLangKindOption ( + const langType language, const char *const option, + const char *const parameter) +{ + const char *p = parameter; + boolean mode = TRUE; + int c; + + Assert (0 <= language && language < (int) LanguageCount); + if (*p != '+' && *p != '-') + disableLanguageKinds (language); + while ((c = *p++) != '\0') switch (c) + { + case '+': mode = TRUE; break; + case '-': mode = FALSE; break; + default: + if (! enableLanguageKind (language, c, mode)) + error (WARNING, "Unsupported parameter '%c' for --%s option", + c, option); + break; + } +} + +extern boolean processKindOption ( + const char *const option, const char *const parameter) +{ + boolean handled = FALSE; + const char* const dash = strchr (option, '-'); + if (dash != NULL && + (strcmp (dash + 1, "kinds") == 0 || strcmp (dash + 1, "types") == 0)) + { + langType language; + vString* langName = vStringNew (); + vStringNCopyS (langName, option, dash - option); + language = getNamedLanguage (vStringValue (langName)); + if (language == LANG_IGNORE) + error (WARNING, "Unknown language \"%s\" in \"%s\" option", vStringValue (langName), option); + else + processLangKindOption (language, option, parameter); + vStringDelete (langName); + handled = TRUE; + } + return handled; +} + +static void printLanguageKind (const kindOption* const kind, boolean indent) +{ + const char *const indentation = indent ? " " : ""; + printf ("%s%c %s%s\n", indentation, kind->letter, + kind->description != NULL ? kind->description : + (kind->name != NULL ? kind->name : ""), + kind->enabled ? "" : " [off]"); +} + +static void printKinds (langType language, boolean indent) +{ + const parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->kinds != NULL || lang->regex) + { + unsigned int i; + for (i = 0 ; i < lang->kindCount ; ++i) + printLanguageKind (lang->kinds + i, indent); + printRegexKinds (language, indent); + } +} + +extern void printLanguageKinds (const langType language) +{ + if (language == LANG_AUTO) + { + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + { + const parserDefinition* const lang = LanguageTable [i]; + printf ("%s%s\n", lang->name, lang->enabled ? "" : " [disabled]"); + printKinds (i, TRUE); + } + } + else + printKinds (language, FALSE); +} + +static void printMaps (const langType language) +{ + const parserDefinition* lang; + unsigned int i; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + printf ("%-8s", lang->name); + if (lang->currentExtensions != NULL) + for (i = 0 ; i < stringListCount (lang->currentExtensions) ; ++i) + printf (" *.%s", vStringValue ( + stringListItem (lang->currentExtensions, i))); + if (lang->currentPatterns != NULL) + for (i = 0 ; i < stringListCount (lang->currentPatterns) ; ++i) + printf (" %s", vStringValue ( + stringListItem (lang->currentPatterns, i))); + putchar ('\n'); +} + +extern void printLanguageMaps (const langType language) +{ + if (language == LANG_AUTO) + { + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + printMaps (i); + } + else + printMaps (language); +} + +static void printLanguage (const langType language) +{ + const parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->kinds != NULL || lang->regex) + printf ("%s%s\n", lang->name, lang->enabled ? "" : " [disabled]"); +} + +extern void printLanguageList (void) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + printLanguage (i); +} + +/* +* File parsing +*/ + +static void makeFileTag (const char *const fileName) +{ + if (Option.include.fileNames) + { + tagEntryInfo tag; + initTagEntry (&tag, baseFilename (fileName)); + + tag.isFileEntry = TRUE; + tag.lineNumberEntry = TRUE; + tag.lineNumber = 1; + tag.kindName = "file"; + tag.kind = 'F'; + + makeTagEntry (&tag); + } +} + +static boolean createTagsForFile ( + const char *const fileName, const langType language, + const unsigned int passCount) +{ + boolean retried = FALSE; + Assert (0 <= language && language < (int) LanguageCount); + if (fileOpen (fileName, language)) + { + const parserDefinition* const lang = LanguageTable [language]; + if (Option.etags) + beginEtagsFile (); + + makeFileTag (fileName); + + if (lang->parser != NULL) + lang->parser (); + else if (lang->parser2 != NULL) + retried = lang->parser2 (passCount); + + if (Option.etags) + endEtagsFile (getSourceFileTagPath ()); + + fileClose (); + } + + return retried; +} + +static boolean createTagsWithFallback ( + const char *const fileName, const langType language) +{ + const unsigned long numTags = TagFile.numTags.added; + fpos_t tagFilePosition; + unsigned int passCount = 0; + boolean tagFileResized = FALSE; + + fgetpos (TagFile.fp, &tagFilePosition); + while (createTagsForFile (fileName, language, ++passCount)) + { + /* Restore prior state of tag file. + */ + fsetpos (TagFile.fp, &tagFilePosition); + TagFile.numTags.added = numTags; + tagFileResized = TRUE; + } + return tagFileResized; +} + +extern boolean parseFile (const char *const fileName) +{ + boolean tagFileResized = FALSE; + langType language = Option.language; + if (Option.language == LANG_AUTO) + language = getFileLanguage (fileName); + Assert (language != LANG_AUTO); + if (language == LANG_IGNORE) + verbose ("ignoring %s (unknown language)\n", fileName); + else if (! LanguageTable [language]->enabled) + verbose ("ignoring %s (language disabled)\n", fileName); + else + { + if (Option.filter) + openTagFile (); + + tagFileResized = createTagsWithFallback (fileName, language); + + if (Option.filter) + closeTagFile (tagFileResized); + addTotals (1, 0L, 0L); + + return tagFileResized; + } + return tagFileResized; +} + +/* vi:set tabstop=4 shiftwidth=4 nowrap: */ diff --git a/third_party/ctags/parse.h b/third_party/ctags/parse.h new file mode 100644 index 000000000..38f4edc9e --- /dev/null +++ b/third_party/ctags/parse.h @@ -0,0 +1,130 @@ +// clang-format off +/* +* $Id: parse.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Private definitions for parsing support. +*/ +#ifndef _PARSE_H +#define _PARSE_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ +#include "third_party/ctags/parsers.h" /* contains list of parsers */ +#include "third_party/ctags/strlist.h" + +/* +* MACROS +*/ +#define KIND_COUNT(kindTable) (sizeof(kindTable)/sizeof(kindOption)) + +#define LANG_AUTO (-1) +#define LANG_IGNORE (-2) + +/* +* DATA DECLARATIONS +*/ +typedef int langType; + +typedef void (*createRegexTag) (const vString* const name); +typedef void (*simpleParser) (void); +typedef boolean (*rescanParser) (const unsigned int passCount); +typedef void (*parserInitialize) (langType language); + +typedef struct sKindOption { + boolean enabled; /* are tags for kind enabled? */ + int letter; /* kind letter */ + const char* name; /* kind name */ + const char* description; /* displayed in --help output */ +} kindOption; + +typedef struct { + /* defined by parser */ + char* name; /* name of language */ + kindOption* kinds; /* tag kinds handled by parser */ + unsigned int kindCount; /* size of `kinds' list */ + const char *const *extensions; /* list of default extensions */ + const char *const *patterns; /* list of default file name patterns */ + parserInitialize initialize; /* initialization routine, if needed */ + simpleParser parser; /* simple parser (common case) */ + rescanParser parser2; /* rescanning parser (unusual case) */ + boolean regex; /* is this a regex parser? */ + + /* used internally */ + unsigned int id; /* id assigned to language */ + boolean enabled; /* currently enabled? */ + stringList* currentPatterns; /* current list of file name patterns */ + stringList* currentExtensions; /* current list of extensions */ +} parserDefinition; + +typedef parserDefinition* (parserDefinitionFunc) (void); + +typedef struct { + size_t start; /* character index in line where match starts */ + size_t length; /* length of match */ +} regexMatch; + +typedef void (*regexCallback) (const char *line, const regexMatch *matches, unsigned int count); + +/* +* FUNCTION PROTOTYPES +*/ + +/* Each parsers' definition function is called. The routine is expected to + * return a structure allocated using parserNew(). This structure must, + * at minimum, set the `parser' field. + */ +extern parserDefinitionFunc PARSER_LIST; + +/* Legacy interface */ +extern boolean includingDefineTags (void); + +/* Language processing and parsing */ +extern void makeSimpleTag (const vString* const name, kindOption* const kinds, const int kind); +extern parserDefinition* parserNew (const char* name); +extern const char *getLanguageName (const langType language); +extern langType getNamedLanguage (const char *const name); +extern langType getFileLanguage (const char *const fileName); +extern void installLanguageMapDefault (const langType language); +extern void installLanguageMapDefaults (void); +extern void clearLanguageMap (const langType language); +extern boolean removeLanguageExtensionMap (const char *const extension); +extern void addLanguageExtensionMap (const langType language, const char* extension); +extern void addLanguagePatternMap (const langType language, const char* ptrn); +extern void printLanguageMap (const langType language); +extern void printLanguageMaps (const langType language); +extern void enableLanguages (const boolean state); +extern void enableLanguage (const langType language, const boolean state); +extern void initializeParsing (void); +extern void freeParserResources (void); +extern void processLanguageDefineOption (const char *const option, const char *const parameter); +extern boolean processKindOption (const char *const option, const char *const parameter); +extern void printKindOptions (void); +extern void printLanguageKinds (const langType language); +extern void printLanguageList (void); +extern boolean parseFile (const char *const fileName); + +/* Regex interface */ +#ifdef HAVE_REGEX +extern void findRegexTags (void); +extern boolean matchRegex (const vString* const line, const langType language); +#endif +extern boolean processRegexOption (const char *const option, const char *const parameter); +extern void addLanguageRegex (const langType language, const char* const regex); +extern void addTagRegex (const langType language, const char* const regex, const char* const name, const char* const kinds, const char* const flags); +extern void addCallbackRegex (const langType language, const char *const regex, const char *const flags, const regexCallback callback); +extern void disableRegexKinds (const langType language); +extern boolean enableRegexKind (const langType language, const int kind, const boolean mode); +extern void printRegexKinds (const langType language, boolean indent); +extern void freeRegexResources (void); +extern void checkRegex (void); + +#endif /* _PARSE_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/parsers.h b/third_party/ctags/parsers.h new file mode 100644 index 000000000..650117211 --- /dev/null +++ b/third_party/ctags/parsers.h @@ -0,0 +1,66 @@ +// clang-format off +/* +* $Id: parsers.h 771 2010-11-30 13:15:12Z vberthoux $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to all language parsing modules. +* +* To add a new language parser, you need only modify this single source +* file to add the name of the parser definition function. +*/ +#ifndef _PARSERS_H +#define _PARSERS_H + +/* Add the name of any new parser definition function here */ +#define PARSER_LIST \ + AntParser, \ + AsmParser, \ + AspParser, \ + AwkParser, \ + BasicParser, \ + BetaParser, \ + CParser, \ + CppParser, \ + CsharpParser, \ + CobolParser, \ + DosBatchParser, \ + EiffelParser, \ + ErlangParser, \ + FlexParser, \ + FortranParser, \ + GoParser, \ + HtmlParser, \ + JavaParser, \ + JavaScriptParser, \ + LispParser, \ + LuaParser, \ + MakefileParser, \ + MatLabParser, \ + ObjcParser , \ + OcamlParser, \ + PascalParser, \ + PerlParser, \ + PhpParser, \ + PythonParser, \ + RexxParser, \ + RubyParser, \ + SchemeParser, \ + ShParser, \ + SlangParser, \ + SmlParser, \ + SqlParser, \ + TclParser, \ + TexParser, \ + VeraParser, \ + VerilogParser, \ + VhdlParser, \ + VimParser, \ + YaccParser + +#endif /* _PARSERS_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/pascal.c b/third_party/ctags/pascal.c new file mode 100644 index 000000000..c1eed45d6 --- /dev/null +++ b/third_party/ctags/pascal.c @@ -0,0 +1,269 @@ +// clang-format off +/* +* $Id: pascal.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2001-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the Pascal language, +* including some extensions for Object Pascal. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/entry.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION, K_PROCEDURE +} pascalKind; + +static kindOption PascalKinds [] = { + { TRUE, 'f', "function", "functions"}, + { TRUE, 'p', "procedure", "procedures"} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void createPascalTag ( + tagEntryInfo* const tag, const vString* const name, const int kind) +{ + if (PascalKinds [kind].enabled && name != NULL && vStringLength (name) > 0) + { + initTagEntry (tag, vStringValue (name)); + tag->kindName = PascalKinds [kind].name; + tag->kind = PascalKinds [kind].letter; + } + else + initTagEntry (tag, NULL); +} + +static void makePascalTag (const tagEntryInfo* const tag) +{ + if (tag->name != NULL) + makeTagEntry (tag); +} + +static const unsigned char* dbp; + +#define starttoken(c) (isalpha ((int) c) || (int) c == '_') +#define intoken(c) (isalnum ((int) c) || (int) c == '_' || (int) c == '.') +#define endtoken(c) (! intoken (c) && ! isdigit ((int) c)) + +static boolean tail (const char *cp) +{ + boolean result = FALSE; + register int len = 0; + + while (*cp != '\0' && tolower ((int) *cp) == tolower ((int) dbp [len])) + cp++, len++; + if (*cp == '\0' && !intoken (dbp [len])) + { + dbp += len; + result = TRUE; + } + return result; +} + +/* Algorithm adapted from from GNU etags. + * Locates tags for procedures & functions. Doesn't do any type- or + * var-definitions. It does look for the keyword "extern" or "forward" + * immediately following the procedure statement; if found, the tag is + * skipped. + */ +static void findPascalTags (void) +{ + vString *name = vStringNew (); + tagEntryInfo tag; + pascalKind kind = K_FUNCTION; + /* each of these flags is TRUE iff: */ + boolean incomment = FALSE; /* point is inside a comment */ + int comment_char = '\0'; /* type of current comment */ + boolean inquote = FALSE; /* point is inside '..' string */ + boolean get_tagname = FALSE;/* point is after PROCEDURE/FUNCTION + keyword, so next item = potential tag */ + boolean found_tag = FALSE; /* point is after a potential tag */ + boolean inparms = FALSE; /* point is within parameter-list */ + boolean verify_tag = FALSE; + /* point has passed the parm-list, so the next token will determine + * whether this is a FORWARD/EXTERN to be ignored, or whether it is a + * real tag + */ + + dbp = fileReadLine (); + while (dbp != NULL) + { + int c = *dbp++; + + if (c == '\0') /* if end of line */ + { + dbp = fileReadLine (); + if (dbp == NULL || *dbp == '\0') + continue; + if (!((found_tag && verify_tag) || get_tagname)) + c = *dbp++; + /* only if don't need *dbp pointing to the beginning of + * the name of the procedure or function + */ + } + if (incomment) + { + if (comment_char == '{' && c == '}') + incomment = FALSE; + else if (comment_char == '(' && c == '*' && *dbp == ')') + { + dbp++; + incomment = FALSE; + } + continue; + } + else if (inquote) + { + if (c == '\'') + inquote = FALSE; + continue; + } + else switch (c) + { + case '\'': + inquote = TRUE; /* found first quote */ + continue; + case '{': /* found open { comment */ + incomment = TRUE; + comment_char = c; + continue; + case '(': + if (*dbp == '*') /* found open (* comment */ + { + incomment = TRUE; + comment_char = c; + dbp++; + } + else if (found_tag) /* found '(' after tag, i.e., parm-list */ + inparms = TRUE; + continue; + case ')': /* end of parms list */ + if (inparms) + inparms = FALSE; + continue; + case ';': + if (found_tag && !inparms) /* end of proc or fn stmt */ + { + verify_tag = TRUE; + break; + } + continue; + } + if (found_tag && verify_tag && *dbp != ' ') + { + /* check if this is an "extern" declaration */ + if (*dbp == '\0') + continue; + if (tolower ((int) *dbp == 'e')) + { + if (tail ("extern")) /* superfluous, really! */ + { + found_tag = FALSE; + verify_tag = FALSE; + } + } + else if (tolower ((int) *dbp) == 'f') + { + if (tail ("forward")) /* check for forward reference */ + { + found_tag = FALSE; + verify_tag = FALSE; + } + } + if (found_tag && verify_tag) /* not external proc, so make tag */ + { + found_tag = FALSE; + verify_tag = FALSE; + makePascalTag (&tag); + continue; + } + } + if (get_tagname) /* grab name of proc or fn */ + { + const unsigned char *cp; + + if (*dbp == '\0') + continue; + + /* grab block name */ + while (isspace ((int) *dbp)) + ++dbp; + for (cp = dbp ; *cp != '\0' && !endtoken (*cp) ; cp++) + continue; + vStringNCopyS (name, (const char*) dbp, cp - dbp); + createPascalTag (&tag, name, kind); + dbp = cp; /* set dbp to e-o-token */ + get_tagname = FALSE; + found_tag = TRUE; + /* and proceed to check for "extern" */ + } + else if (!incomment && !inquote && !found_tag) + { + switch (tolower ((int) c)) + { + case 'c': + if (tail ("onstructor")) + { + get_tagname = TRUE; + kind = K_PROCEDURE; + } + break; + case 'd': + if (tail ("estructor")) + { + get_tagname = TRUE; + kind = K_PROCEDURE; + } + break; + case 'p': + if (tail ("rocedure")) + { + get_tagname = TRUE; + kind = K_PROCEDURE; + } + break; + case 'f': + if (tail ("unction")) + { + get_tagname = TRUE; + kind = K_FUNCTION; + } + break; + } + } /* while not eof */ + } + vStringDelete (name); +} + +extern parserDefinition* PascalParser (void) +{ + static const char *const extensions [] = { "p", "pas", NULL }; + parserDefinition* def = parserNew ("Pascal"); + def->extensions = extensions; + def->kinds = PascalKinds; + def->kindCount = KIND_COUNT (PascalKinds); + def->parser = findPascalTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/perl.c b/third_party/ctags/perl.c new file mode 100644 index 000000000..e6d2007a3 --- /dev/null +++ b/third_party/ctags/perl.c @@ -0,0 +1,384 @@ +// clang-format off +/* +* $Id: perl.c 601 2007-08-02 04:45:16Z perlguy0 $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for PERL language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/entry.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +#define TRACE_PERL_C 0 +#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_NONE = -1, + K_CONSTANT, + K_FORMAT, + K_LABEL, + K_PACKAGE, + K_SUBROUTINE, + K_SUBROUTINE_DECLARATION +} perlKind; + +static kindOption PerlKinds [] = { + { TRUE, 'c', "constant", "constants" }, + { TRUE, 'f', "format", "formats" }, + { TRUE, 'l', "label", "labels" }, + { TRUE, 'p', "package", "packages" }, + { TRUE, 's', "subroutine", "subroutines" }, + { FALSE, 'd', "subroutine declaration", "subroutine declarations" }, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static boolean isIdentifier1 (int c) +{ + return (boolean) (isalpha (c) || c == '_'); +} + +static boolean isIdentifier (int c) +{ + return (boolean) (isalnum (c) || c == '_'); +} + +static boolean isPodWord (const char *word) +{ + boolean result = FALSE; + if (isalpha (*word)) + { + const char *const pods [] = { + "head1", "head2", "head3", "head4", "over", "item", "back", + "pod", "begin", "end", "for" + }; + const size_t count = sizeof (pods) / sizeof (pods [0]); + const char *white = strpbrk (word, " \t"); + const size_t len = (white!=NULL) ? (size_t)(white-word) : strlen (word); + char *const id = (char*) eMalloc (len + 1); + size_t i; + strncpy (id, word, len); + id [len] = '\0'; + for (i = 0 ; i < count && ! result ; ++i) + { + if (strcmp (id, pods [i]) == 0) + result = TRUE; + } + eFree (id); + } + return result; +} + +/* + * Perl subroutine declaration may look like one of the following: + * + * sub abc; + * sub abc :attr; + * sub abc (proto); + * sub abc (proto) :attr; + * + * Note that there may be more than one attribute. Attributes may + * have things in parentheses (they look like arguments). Anything + * inside of those parentheses goes. Prototypes may contain semi-colons. + * The matching end when we encounter (outside of any parentheses) either + * a semi-colon (that'd be a declaration) or an left curly brace + * (definition). + * + * This is pretty complicated parsing (plus we all know that only perl can + * parse Perl), so we are only promising best effort here. + * + * If we can't determine what this is (due to a file ending, for example), + * we will return FALSE. + */ +static boolean isSubroutineDeclaration (const unsigned char *cp) +{ + boolean attr = FALSE; + int nparens = 0; + + do { + for ( ; *cp; ++cp) { +SUB_DECL_SWITCH: + switch (*cp) { + case ':': + if (nparens) + break; + else if (TRUE == attr) + return FALSE; /* Invalid attribute name */ + else + attr = TRUE; + break; + case '(': + ++nparens; + break; + case ')': + --nparens; + break; + case ' ': + case '\t': + break; + case ';': + if (!nparens) + return TRUE; + case '{': + if (!nparens) + return FALSE; + default: + if (attr) { + if (isIdentifier1(*cp)) { + cp++; + while (isIdentifier (*cp)) + cp++; + attr = FALSE; + goto SUB_DECL_SWITCH; /* Instead of --cp; */ + } else { + return FALSE; + } + } else if (nparens) { + break; + } else { + return FALSE; + } + } + } + } while (NULL != (cp = fileReadLine ())); + + return FALSE; +} + +/* Algorithm adapted from from GNU etags. + * Perl support by Bart Robinson + * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/ + */ +static void findPerlTags (void) +{ + vString *name = vStringNew (); + vString *package = NULL; + boolean skipPodDoc = FALSE; + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + boolean spaceRequired = FALSE; + boolean qualified = FALSE; + const unsigned char *cp = line; + perlKind kind = K_NONE; + tagEntryInfo e; + + if (skipPodDoc) + { + if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0) + skipPodDoc = FALSE; + continue; + } + else if (line [0] == '=') + { + skipPodDoc = isPodWord ((const char*)line + 1); + continue; + } + else if (strcmp ((const char*) line, "__DATA__") == 0) + break; + else if (strcmp ((const char*) line, "__END__") == 0) + break; + else if (line [0] == '#') + continue; + + while (isspace (*cp)) + cp++; + + if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) + { + TRACE("this looks like a sub\n"); + cp += 3; + kind = K_SUBROUTINE; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "use", (size_t) 3) == 0) + { + cp += 3; + if (!isspace(*cp)) + continue; + while (*cp && isspace (*cp)) + ++cp; + if (strncmp((const char*) cp, "constant", (size_t) 8) != 0) + continue; + cp += 8; + kind = K_CONSTANT; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "package", (size_t) 7) == 0) + { + /* This will point to space after 'package' so that a tag + can be made */ + const unsigned char *space = cp += 7; + + if (package == NULL) + package = vStringNew (); + else + vStringClear (package); + while (isspace (*cp)) + cp++; + while ((int) *cp != ';' && !isspace ((int) *cp)) + { + vStringPut (package, (int) *cp); + cp++; + } + vStringCatS (package, "::"); + + cp = space; /* Rewind */ + kind = K_PACKAGE; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "format", (size_t) 6) == 0) + { + cp += 6; + kind = K_FORMAT; + spaceRequired = TRUE; + qualified = TRUE; + } + else + { + if (isIdentifier1 (*cp)) + { + const unsigned char *p = cp; + while (isIdentifier (*p)) + ++p; + while (isspace (*p)) + ++p; + if ((int) *p == ':' && (int) *(p + 1) != ':') + kind = K_LABEL; + } + } + if (kind != K_NONE) + { + TRACE("cp0: %s\n", (const char *) cp); + if (spaceRequired && *cp && !isspace (*cp)) + continue; + + TRACE("cp1: %s\n", (const char *) cp); + while (isspace (*cp)) + cp++; + + while (!*cp || '#' == *cp) { /* Gobble up empty lines + and comments */ + cp = fileReadLine (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + } + + while (isIdentifier (*cp) || (K_PACKAGE == kind && ':' == *cp)) + { + vStringPut (name, (int) *cp); + cp++; + } + + if (K_FORMAT == kind && + vStringLength (name) == 0 && /* cp did not advance */ + '=' == *cp) + { + /* format's name is optional. If it's omitted, 'STDOUT' + is assumed. */ + vStringCatS (name, "STDOUT"); + } + + vStringTerminate (name); + TRACE("name: %s\n", name->buffer); + + if (0 == vStringLength(name)) { + vStringClear(name); + continue; + } + + if (K_SUBROUTINE == kind) + { + /* + * isSubroutineDeclaration() may consume several lines. So + * we record line positions. + */ + initTagEntry(&e, vStringValue(name)); + + if (TRUE == isSubroutineDeclaration(cp)) { + if (TRUE == PerlKinds[K_SUBROUTINE_DECLARATION].enabled) { + kind = K_SUBROUTINE_DECLARATION; + } else { + vStringClear (name); + continue; + } + } + + e.kind = PerlKinds[kind].letter; + e.kindName = PerlKinds[kind].name; + + makeTagEntry(&e); + + if (Option.include.qualifiedTags && qualified && + package != NULL && vStringLength (package) > 0) + { + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + e.name = vStringValue(qualifiedName); + makeTagEntry(&e); + vStringDelete (qualifiedName); + } + } else if (vStringLength (name) > 0) + { + makeSimpleTag (name, PerlKinds, kind); + if (Option.include.qualifiedTags && qualified && + K_PACKAGE != kind && + package != NULL && vStringLength (package) > 0) + { + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + makeSimpleTag (qualifiedName, PerlKinds, kind); + vStringDelete (qualifiedName); + } + } + vStringClear (name); + } + } + +END_MAIN_WHILE: + vStringDelete (name); + if (package != NULL) + vStringDelete (package); +} + +extern parserDefinition* PerlParser (void) +{ + static const char *const extensions [] = { "pl", "pm", "plx", "perl", NULL }; + parserDefinition* def = parserNew ("Perl"); + def->kinds = PerlKinds; + def->kindCount = KIND_COUNT (PerlKinds); + def->extensions = extensions; + def->parser = findPerlTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/third_party/ctags/php.c b/third_party/ctags/php.c new file mode 100644 index 000000000..231a5ac45 --- /dev/null +++ b/third_party/ctags/php.c @@ -0,0 +1,226 @@ +// clang-format off +/* +* $Id: php.c 734 2009-08-20 23:33:54Z jafl $ +* +* Copyright (c) 2000, Jesus Castagnetto +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the PHP web page +* scripting language. Only recognizes functions and classes, not methods or +* variables. +* +* Parsing PHP defines by Pavel Hlousek , Apr 2003. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_CLASS, K_DEFINE, K_FUNCTION, K_VARIABLE +} phpKind; + +#if 0 +static kindOption PhpKinds [] = { + { TRUE, 'c', "class", "classes" }, + { TRUE, 'd', "define", "constant definitions" }, + { TRUE, 'f', "function", "functions" }, + { TRUE, 'v', "variable", "variables" } +}; +#endif + +/* +* FUNCTION DEFINITIONS +*/ + +/* JavaScript patterns are duplicated in jscript.c */ + +#define ALPHA "[:alpha:]" +#define ALNUM "[:alnum:]" + +static void installPHPRegex (const langType language) +{ + addTagRegex(language, "^[ \t]*((final|abstract)[ \t]+)*class[ \t]+([" ALPHA "_][" ALNUM "_]*)", + "\\3", "c,class,classes", NULL); + addTagRegex(language, "^[ \t]*interface[ \t]+([" ALPHA "_][" ALNUM "_]*)", + "\\1", "i,interface,interfaces", NULL); + addTagRegex(language, "^[ \t]*define[ \t]*\\([ \t]*['\"]?([" ALPHA "_][" ALNUM "_]*)", + "\\1", "d,define,constant definitions", NULL); + addTagRegex(language, "^[ \t]*((static|public|protected|private)[ \t]+)*function[ \t]+&?[ \t]*([" ALPHA "_][" ALNUM "_]*)", + "\\3", "f,function,functions", NULL); + addTagRegex(language, "^[ \t]*(\\$|::\\$|\\$this->)([" ALPHA "_][" ALNUM "_]*)[ \t]*=", + "\\2", "v,variable,variables", NULL); + addTagRegex(language, "^[ \t]*((var|public|protected|private|static)[ \t]+)+\\$([" ALPHA "_][" ALNUM "_]*)[ \t]*[=;]", + "\\3", "v,variable,variables", NULL); + + /* function regex is covered by PHP regex */ + addTagRegex (language, "(^|[ \t])([A-Za-z0-9_]+)[ \t]*[=:][ \t]*function[ \t]*\\(", + "\\2", "j,jsfunction,javascript functions", NULL); + addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(", + "\\2.\\3", "j,jsfunction,javascript functions", NULL); + addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(", + "\\3", "j,jsfunction,javascript functions", NULL); +} + +/* Create parser definition structure */ +extern parserDefinition* PhpParser (void) +{ + static const char *const extensions [] = { "php", "php3", "phtml", NULL }; + parserDefinition* def = parserNew ("PHP"); + def->extensions = extensions; + def->initialize = installPHPRegex; + def->regex = TRUE; + return def; +} + +#if 0 + +static boolean isLetter(const int c) +{ + return (boolean)(isalpha(c) || (c >= 127 && c <= 255)); +} + +static boolean isVarChar1(const int c) +{ + return (boolean)(isLetter (c) || c == '_'); +} + +static boolean isVarChar(const int c) +{ + return (boolean)(isVarChar1 (c) || isdigit (c)); +} + +static void findPhpTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + const char* f; + + while (isspace (*cp)) + cp++; + + if (*(const char*)cp == '$' && isVarChar1 (*(const char*)(cp+1))) + { + cp += 1; + vStringClear (name); + while (isVarChar ((int) *cp)) + { + vStringPut (name, (int) *cp); + ++cp; + } + while (isspace ((int) *cp)) + ++cp; + if (*(const char*) cp == '=') + { + vStringTerminate (name); + makeSimpleTag (name, PhpKinds, K_VARIABLE); + vStringClear (name); + } + } + else if ((f = strstr ((const char*) cp, "function")) != NULL && + (f == (const char*) cp || isspace ((int) f [-1])) && + isspace ((int) f [8])) + { + cp = ((const unsigned char *) f) + 8; + + while (isspace ((int) *cp)) + ++cp; + + if (*cp == '&') /* skip reference character and following whitespace */ + { + cp++; + + while (isspace ((int) *cp)) + ++cp; + } + + vStringClear (name); + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, PhpKinds, K_FUNCTION); + vStringClear (name); + } + else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0 && + isspace ((int) cp [5])) + { + cp += 5; + + while (isspace ((int) *cp)) + ++cp; + vStringClear (name); + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, PhpKinds, K_CLASS); + vStringClear (name); + } + else if (strncmp ((const char*) cp, "define", (size_t) 6) == 0 && + ! isalnum ((int) cp [6])) + { + cp += 6; + + while (isspace ((int) *cp)) + ++cp; + if (*cp != '(') + continue; + ++cp; + + while (isspace ((int) *cp)) + ++cp; + if ((*cp == '\'') || (*cp == '"')) + ++cp; + else if (! ((*cp == '_') || isalnum ((int) *cp))) + continue; + + vStringClear (name); + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, PhpKinds, K_DEFINE); + vStringClear (name); + } + } + vStringDelete (name); +} + +extern parserDefinition* PhpParser (void) +{ + static const char *const extensions [] = { "php", "php3", "phtml", NULL }; + parserDefinition* def = parserNew ("PHP"); + def->kinds = PhpKinds; + def->kindCount = KIND_COUNT (PhpKinds); + def->extensions = extensions; + def->parser = findPhpTags; + return def; +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/python.c b/third_party/ctags/python.c new file mode 100644 index 000000000..b35cb688f --- /dev/null +++ b/third_party/ctags/python.c @@ -0,0 +1,776 @@ +// clang-format off +/* +* $Id: python.c 752 2010-02-27 17:52:46Z elliotth $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Python language +* files. +*/ +/* +* INCLUDE FILES +*/ +#include "libc/mem/mem.h" +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/entry.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/main.h" +#include "third_party/ctags/vstring.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/debug.h" + +/* +* DATA DECLARATIONS +*/ +typedef struct NestingLevel NestingLevel; +typedef struct NestingLevels NestingLevels; + +struct NestingLevel +{ + int indentation; + vString *name; + int type; +}; + +struct NestingLevels +{ + NestingLevel *levels; + int n; /* number of levels in use */ + int allocated; +}; + +typedef enum { + K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT +} pythonKind; + +/* +* DATA DEFINITIONS +*/ +static kindOption PythonKinds[] = { + {TRUE, 'c', "class", "classes"}, + {TRUE, 'f', "function", "functions"}, + {TRUE, 'm', "member", "class members"}, + {TRUE, 'v', "variable", "variables"}, + {FALSE, 'i', "namespace", "imports"} +}; + +static char const * const singletriple = "'''"; +static char const * const doubletriple = "\"\"\""; + +/* +* FUNCTION DEFINITIONS +*/ + +static NestingLevels *nestingLevelsNew (void) +{ + NestingLevels *nls = xCalloc (1, NestingLevels); + return nls; +} + +static void nestingLevelsFree (NestingLevels *nls) +{ + int i; + for (i = 0; i < nls->allocated; i++) + vStringDelete(nls->levels[i].name); + if (nls->levels) eFree(nls->levels); + eFree(nls); +} + +static void nestingLevelsPush (NestingLevels *nls, + const vString *name, int type) +{ + NestingLevel *nl = NULL; + + if (nls->n >= nls->allocated) + { + nls->allocated++; + nls->levels = xRealloc(nls->levels, + nls->allocated, NestingLevel); + nls->levels[nls->n].name = vStringNew(); + } + nl = &nls->levels[nls->n]; + nls->n++; + + vStringCopy(nl->name, name); + nl->type = type; +} + +#if 0 +static NestingLevel *nestingLevelsGetCurrent (NestingLevels *nls) +{ + Assert (nls != NULL); + + if (nls->n < 1) + return NULL; + + return &nls->levels[nls->n - 1]; +} + +static void nestingLevelsPop (NestingLevels *nls) +{ + const NestingLevel *nl = nestingLevelsGetCurrent(nls); + + Assert (nl != NULL); + vStringClear(nl->name); + nls->n--; +} +#endif + +static boolean isIdentifierFirstCharacter (int c) +{ + return (boolean) (isalpha (c) || c == '_'); +} + +static boolean isIdentifierCharacter (int c) +{ + return (boolean) (isalnum (c) || c == '_'); +} + +/* Given a string with the contents of a line directly after the "def" keyword, + * extract all relevant information and create a tag. + */ +static void makeFunctionTag (vString *const function, + vString *const parent, int is_class_parent, const char *arglist __unused) +{ + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (function)); + + tag.kindName = "function"; + tag.kind = 'f'; + /* tag.extensionFields.arglist = arglist; */ + + if (vStringLength (parent) > 0) + { + if (is_class_parent) + { + tag.kindName = "member"; + tag.kind = 'm'; + tag.extensionFields.scope [0] = "class"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + else + { + tag.extensionFields.scope [0] = "function"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + } + + /* If a function starts with __, we mark it as file scope. + * FIXME: What is the proper way to signal such attributes? + * TODO: What does functions/classes starting with _ and __ mean in python? + */ + if (strncmp (vStringValue (function), "__", 2) == 0 && + strcmp (vStringValue (function), "__init__") != 0) + { + tag.extensionFields.access = "private"; + tag.isFileScope = TRUE; + } + else + { + tag.extensionFields.access = "public"; + } + makeTagEntry (&tag); +} + +/* Given a string with the contents of the line directly after the "class" + * keyword, extract all necessary information and create a tag. + */ +static void makeClassTag (vString *const class, vString *const inheritance, + vString *const parent, int is_class_parent) +{ + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (class)); + tag.kindName = "class"; + tag.kind = 'c'; + if (vStringLength (parent) > 0) + { + if (is_class_parent) + { + tag.extensionFields.scope [0] = "class"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + else + { + tag.extensionFields.scope [0] = "function"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + } + tag.extensionFields.inheritance = vStringValue (inheritance); + makeTagEntry (&tag); +} + +static void makeVariableTag (vString *const var, vString *const parent) +{ + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (var)); + tag.kindName = "variable"; + tag.kind = 'v'; + if (vStringLength (parent) > 0) + { + tag.extensionFields.scope [0] = "class"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + makeTagEntry (&tag); +} + +/* Skip a single or double quoted string. */ +static const char *skipString (const char *cp) +{ + const char *start = cp; + int escaped = 0; + for (cp++; *cp; cp++) + { + if (escaped) + escaped--; + else if (*cp == '\\') + escaped++; + else if (*cp == *start) + return cp + 1; + } + return cp; +} + +/* Skip everything up to an identifier start. */ +static const char *skipEverything (const char *cp) +{ + for (; *cp; cp++) + { + if (*cp == '"' || *cp == '\'' || *cp == '#') + { + cp = skipString(cp); + if (!*cp) break; + } + if (isIdentifierFirstCharacter ((int) *cp)) + return cp; + } + return cp; +} + +/* Skip an identifier. */ +static const char *skipIdentifier (const char *cp) +{ + while (isIdentifierCharacter ((int) *cp)) + cp++; + return cp; +} + +static const char *findDefinitionOrClass (const char *cp) +{ + while (*cp) + { + cp = skipEverything (cp); + if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) || + !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5)) + { + return cp; + } + cp = skipIdentifier (cp); + } + return NULL; +} + +static const char *skipSpace (const char *cp) +{ + while (isspace ((int) *cp)) + ++cp; + return cp; +} + +/* Starting at ''cp'', parse an identifier into ''identifier''. */ +static const char *parseIdentifier (const char *cp, vString *const identifier) +{ + vStringClear (identifier); + while (isIdentifierCharacter ((int) *cp)) + { + vStringPut (identifier, (int) *cp); + ++cp; + } + vStringTerminate (identifier); + return cp; +} + +static void parseClass (const char *cp, vString *const class, + vString *const parent, int is_class_parent) +{ + vString *const inheritance = vStringNew (); + vStringClear (inheritance); + cp = parseIdentifier (cp, class); + cp = skipSpace (cp); + if (*cp == '(') + { + ++cp; + while (*cp != ')') + { + if (*cp == '\0') + { + /* Closing parenthesis can be in follow up line. */ + cp = (const char *) fileReadLine (); + if (!cp) break; + vStringPut (inheritance, ' '); + continue; + } + vStringPut (inheritance, *cp); + ++cp; + } + vStringTerminate (inheritance); + } + makeClassTag (class, inheritance, parent, is_class_parent); + vStringDelete (inheritance); +} + +static void parseImports (const char *cp) +{ + const char *pos; + vString *name, *name_next; + + cp = skipEverything (cp); + + if ((pos = strstr (cp, "import")) == NULL) + return; + + cp = pos + 6; + + /* continue only if there is some space between the keyword and the identifier */ + if (! isspace (*cp)) + return; + + cp++; + cp = skipSpace (cp); + + name = vStringNew (); + name_next = vStringNew (); + + cp = skipEverything (cp); + while (*cp) + { + cp = parseIdentifier (cp, name); + + cp = skipEverything (cp); + /* we parse the next possible import statement as well to be able to ignore 'foo' in + * 'import foo as bar' */ + parseIdentifier (cp, name_next); + + /* take the current tag only if the next one is not "as" */ + if (strcmp (vStringValue (name_next), "as") != 0 && + strcmp (vStringValue (name), "as") != 0) + { + makeSimpleTag (name, PythonKinds, K_IMPORT); + } + } + vStringDelete (name); + vStringDelete (name_next); +} + +/* modified from get.c getArglistFromStr(). + * warning: terminates rest of string past arglist! + * note: does not ignore brackets inside strings! */ +static char *parseArglist(const char *buf) +{ + char *start, *end; + int level; + if (NULL == buf) + return NULL; + if (NULL == (start = strchr(buf, '('))) + return NULL; + for (level = 1, end = start + 1; level > 0; ++end) + { + if ('\0' == *end) + break; + else if ('(' == *end) + ++ level; + else if (')' == *end) + -- level; + } + *end = '\0'; + return strdup(start); +} + +static void parseFunction (const char *cp, vString *const def, + vString *const parent, int is_class_parent) +{ + char *arglist; + + cp = parseIdentifier (cp, def); + arglist = parseArglist (cp); + makeFunctionTag (def, parent, is_class_parent, arglist); + if (arglist != NULL) { + eFree (arglist); + } +} + +/* Get the combined name of a nested symbol. Classes are separated with ".", + * functions with "/". For example this code: + * class MyClass: + * def myFunction: + * def SubFunction: + * class SubClass: + * def Method: + * pass + * Would produce this string: + * MyClass.MyFunction/SubFunction/SubClass.Method + */ +static boolean constructParentString(NestingLevels *nls, int indent, + vString *result) +{ + int i; + NestingLevel *prev = NULL; + int is_class = FALSE; + vStringClear (result); + for (i = 0; i < nls->n; i++) + { + NestingLevel *nl = nls->levels + i; + if (indent <= nl->indentation) + break; + if (prev) + { + vStringCatS(result, "."); /* make Geany symbol list grouping work properly */ +/* + if (prev->type == K_CLASS) + vStringCatS(result, "."); + else + vStringCatS(result, "/"); +*/ + } + vStringCat(result, nl->name); + is_class = (nl->type == K_CLASS); + prev = nl; + } + return is_class; +} + +/* Check whether parent's indentation level is higher than the current level and + * if so, remove it. + */ +static void checkParent(NestingLevels *nls, int indent, vString *parent) +{ + int i; + NestingLevel *n; + + for (i = 0; i < nls->n; i++) + { + n = nls->levels + i; + /* is there a better way to compare two vStrings? */ + if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0) + { + if (n && indent <= n->indentation) + { + /* remove this level by clearing its name */ + vStringClear(n->name); + } + break; + } + } +} + +static void addNestingLevel(NestingLevels *nls, int indentation, + const vString *name, boolean is_class) +{ + int i; + NestingLevel *nl = NULL; + + for (i = 0; i < nls->n; i++) + { + nl = nls->levels + i; + if (indentation <= nl->indentation) break; + } + if (i == nls->n) + { + nestingLevelsPush(nls, name, 0); + nl = nls->levels + i; + } + else + { /* reuse existing slot */ + nls->n = i + 1; + vStringCopy(nl->name, name); + } + nl->indentation = indentation; + nl->type = is_class ? K_CLASS : !K_CLASS; +} + +/* Return a pointer to the start of the next triple string, or NULL. Store + * the kind of triple string in "which" if the return is not NULL. + */ +static char const *find_triple_start(char const *string, char const **which) +{ + char const *cp = string; + + for (; *cp; cp++) + { + if (*cp == '"' || *cp == '\'') + { + if (strncmp(cp, doubletriple, 3) == 0) + { + *which = doubletriple; + return cp; + } + if (strncmp(cp, singletriple, 3) == 0) + { + *which = singletriple; + return cp; + } + cp = skipString(cp); + if (!*cp) break; + } + } + return NULL; +} + +/* Find the end of a triple string as pointed to by "which", and update "which" + * with any other triple strings following in the given string. + */ +static void find_triple_end(char const *string, char const **which) +{ + char const *s = string; + while (1) + { + /* Check if the string ends in the same line. */ + s = strstr (s, *which); + if (!s) break; + s += 3; + *which = NULL; + /* If yes, check if another one starts in the same line. */ + s = find_triple_start(s, which); + if (!s) break; + s += 3; + } +} + +static const char *findVariable(const char *line) +{ + /* Parse global and class variable names (C.x) from assignment statements. + * Object attributes (obj.x) are ignored. + * Assignment to a tuple 'x, y = 2, 3' not supported. + * TODO: ignore duplicate tags from reassignment statements. */ + const char *cp, *sp, *eq, *start; + + cp = strstr(line, "="); + if (!cp) + return NULL; + eq = cp + 1; + while (*eq) + { + if (*eq == '=') + return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */ + if (*eq == '(' || *eq == '#') + break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */ + eq++; + } + + /* go backwards to the start of the line, checking we have valid chars */ + start = cp - 1; + while (start >= line && isspace ((int) *start)) + --start; + while (start >= line && isIdentifierCharacter ((int) *start)) + --start; + if (!isIdentifierFirstCharacter(*(start + 1))) + return NULL; + sp = start; + while (sp >= line && isspace ((int) *sp)) + --sp; + if ((sp + 1) != line) /* the line isn't a simple variable assignment */ + return NULL; + /* the line is valid, parse the variable name */ + ++start; + return start; +} + +/* Skip type declaration that optionally follows a cdef/cpdef */ +static const char *skipTypeDecl (const char *cp, boolean *is_class) +{ + const char *lastStart = cp, *ptr = cp; + int loopCount = 0; + ptr = skipSpace(cp); + if (!strncmp("extern", ptr, 6)) { + ptr += 6; + ptr = skipSpace(ptr); + if (!strncmp("from", ptr, 4)) { return NULL; } + } + if (!strncmp("class", ptr, 5)) { + ptr += 5 ; + *is_class = TRUE; + ptr = skipSpace(ptr); + return ptr; + } + /* limit so that we don't pick off "int item=obj()" */ + while (*ptr && loopCount++ < 2) { + while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++; + if (!*ptr || *ptr == '=') return NULL; + if (*ptr == '(') { + return lastStart; /* if we stopped on a '(' we are done */ + } + ptr = skipSpace(ptr); + lastStart = ptr; + while (*lastStart == '*') lastStart++; /* cdef int *identifier */ + } + return NULL; +} + +static void findPythonTags (void) +{ + vString *const continuation = vStringNew (); + vString *const name = vStringNew (); + vString *const parent = vStringNew(); + + NestingLevels *const nesting_levels = nestingLevelsNew(); + + const char *line; + int line_skip = 0; + char const *longStringLiteral = NULL; + + while ((line = (const char *) fileReadLine ()) != NULL) + { + const char *cp = line, *candidate; + char const *longstring; + char const *keyword, *variable; + int indent; + + cp = skipSpace (cp); + + if (*cp == '\0') /* skip blank line */ + continue; + + /* Skip comment if we are not inside a multi-line string. */ + if (*cp == '#' && !longStringLiteral) + continue; + + /* Deal with line continuation. */ + if (!line_skip) vStringClear(continuation); + vStringCatS(continuation, line); + vStringStripTrailing(continuation); + if (vStringLast(continuation) == '\\') + { + vStringChop(continuation); + vStringCatS(continuation, " "); + line_skip = 1; + continue; + } + cp = line = vStringValue(continuation); + cp = skipSpace (cp); + indent = cp - line; + line_skip = 0; + + checkParent(nesting_levels, indent, parent); + + /* Deal with multiline string ending. */ + if (longStringLiteral) + { + find_triple_end(cp, &longStringLiteral); + continue; + } + + /* Deal with multiline string start. */ + longstring = find_triple_start(cp, &longStringLiteral); + if (longstring) + { + longstring += 3; + find_triple_end(longstring, &longStringLiteral); + /* We don't parse for any tags in the rest of the line. */ + continue; + } + + /* Deal with def and class keywords. */ + keyword = findDefinitionOrClass (cp); + if (keyword) + { + boolean found = FALSE; + boolean is_class = FALSE; + if (!strncmp (keyword, "def ", 4)) + { + cp = skipSpace (keyword + 3); + found = TRUE; + } + else if (!strncmp (keyword, "class ", 6)) + { + cp = skipSpace (keyword + 5); + found = TRUE; + is_class = TRUE; + } + else if (!strncmp (keyword, "cdef ", 5)) + { + cp = skipSpace(keyword + 4); + candidate = skipTypeDecl (cp, &is_class); + if (candidate) + { + found = TRUE; + cp = candidate; + } + + } + else if (!strncmp (keyword, "cpdef ", 6)) + { + cp = skipSpace(keyword + 5); + candidate = skipTypeDecl (cp, &is_class); + if (candidate) + { + found = TRUE; + cp = candidate; + } + } + + if (found) + { + boolean is_parent_class; + + is_parent_class = + constructParentString(nesting_levels, indent, parent); + + if (is_class) + parseClass (cp, name, parent, is_parent_class); + else + parseFunction(cp, name, parent, is_parent_class); + + addNestingLevel(nesting_levels, indent, name, is_class); + } + } + /* Find global and class variables */ + variable = findVariable(line); + if (variable) + { + const char *start = variable; + boolean parent_is_class; + + vStringClear (name); + while (isIdentifierCharacter ((int) *start)) + { + vStringPut (name, (int) *start); + ++start; + } + vStringTerminate (name); + + parent_is_class = constructParentString(nesting_levels, indent, parent); + /* skip variables in methods */ + if (! parent_is_class && vStringLength(parent) > 0) + continue; + + makeVariableTag (name, parent); + } + /* Find and parse imports */ + parseImports(line); + } + /* Clean up all memory we allocated. */ + vStringDelete (parent); + vStringDelete (name); + vStringDelete (continuation); + nestingLevelsFree (nesting_levels); +} + +extern parserDefinition *PythonParser (void) +{ + static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL }; + parserDefinition *def = parserNew ("Python"); + def->kinds = PythonKinds; + def->kindCount = KIND_COUNT (PythonKinds); + def->extensions = extensions; + def->parser = findPythonTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/read.c b/third_party/ctags/read.c new file mode 100644 index 000000000..2d1c5bd95 --- /dev/null +++ b/third_party/ctags/read.c @@ -0,0 +1,565 @@ +// clang-format off +/* +* $Id: read.c 769 2010-09-11 21:00:16Z dhiebert $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains low level source and tag file read functions (newline +* conversion for source files are performed at this level). +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/str/str.h" + +#define FILE_WRITE +#include "third_party/ctags/read.h" +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/main.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/options.h" + +/* +* DATA DEFINITIONS +*/ +inputFile File; /* globally read through macros */ +static fpos_t StartOfLine; /* holds deferred position of start of line */ + +/* +* FUNCTION DEFINITIONS +*/ + +extern void freeSourceFileResources (void) +{ + if (File.name != NULL) + vStringDelete (File.name); + if (File.path != NULL) + vStringDelete (File.path); + if (File.source.name != NULL) + vStringDelete (File.source.name); + if (File.source.tagPath != NULL) + eFree (File.source.tagPath); + if (File.line != NULL) + vStringDelete (File.line); +} + +/* + * Source file access functions + */ + +static void setInputFileName (const char *const fileName) +{ + const char *const head = fileName; + const char *const tail = baseFilename (head); + + if (File.name != NULL) + vStringDelete (File.name); + File.name = vStringNewInit (fileName); + + if (File.path != NULL) + vStringDelete (File.path); + if (tail == head) + File.path = NULL; + else + { + const size_t length = tail - head - 1; + File.path = vStringNew (); + vStringNCopyS (File.path, fileName, length); + } +} + +static void setSourceFileParameters (vString *const fileName) +{ + if (File.source.name != NULL) + vStringDelete (File.source.name); + File.source.name = fileName; + + if (File.source.tagPath != NULL) + eFree (File.source.tagPath); + if (! Option.tagRelative || isAbsolutePath (vStringValue (fileName))) + File.source.tagPath = eStrdup (vStringValue (fileName)); + else + File.source.tagPath = + relativeFilename (vStringValue (fileName), TagFile.directory); + + if (vStringLength (fileName) > TagFile.max.file) + TagFile.max.file = vStringLength (fileName); + + File.source.isHeader = isIncludeFile (vStringValue (fileName)); + File.source.language = getFileLanguage (vStringValue (fileName)); +} + +static boolean setSourceFileName (vString *const fileName) +{ + boolean result = FALSE; + if (getFileLanguage (vStringValue (fileName)) != LANG_IGNORE) + { + vString *pathName; + if (isAbsolutePath (vStringValue (fileName)) || File.path == NULL) + pathName = vStringNewCopy (fileName); + else + pathName = combinePathAndFile ( + vStringValue (File.path), vStringValue (fileName)); + setSourceFileParameters (pathName); + result = TRUE; + } + return result; +} + +/* + * Line directive parsing + */ + +static int skipWhite (void) +{ + int c; + do + c = getc (File.fp); + while (c == ' ' || c == '\t'); + return c; +} + +static unsigned long readLineNumber (void) +{ + unsigned long lNum = 0; + int c = skipWhite (); + while (c != EOF && isdigit (c)) + { + lNum = (lNum * 10) + (c - '0'); + c = getc (File.fp); + } + ungetc (c, File.fp); + if (c != ' ' && c != '\t') + lNum = 0; + + return lNum; +} + +/* While ANSI only permits lines of the form: + * # line n "filename" + * Earlier compilers generated lines of the form + * # n filename + * GNU C will output lines of the form: + * # n "filename" + * So we need to be fairly flexible in what we accept. + */ +static vString *readFileName (void) +{ + vString *const fileName = vStringNew (); + boolean quoteDelimited = FALSE; + int c = skipWhite (); + + if (c == '"') + { + c = getc (File.fp); /* skip double-quote */ + quoteDelimited = TRUE; + } + while (c != EOF && c != '\n' && + (quoteDelimited ? (c != '"') : (c != ' ' && c != '\t'))) + { + vStringPut (fileName, c); + c = getc (File.fp); + } + if (c == '\n') + ungetc (c, File.fp); + vStringPut (fileName, '\0'); + + return fileName; +} + +static boolean parseLineDirective (void) +{ + boolean result = FALSE; + int c = skipWhite (); + DebugStatement ( const char* lineStr = ""; ) + + if (isdigit (c)) + { + ungetc (c, File.fp); + result = TRUE; + } + else if (c == 'l' && getc (File.fp) == 'i' && + getc (File.fp) == 'n' && getc (File.fp) == 'e') + { + c = getc (File.fp); + if (c == ' ' || c == '\t') + { + DebugStatement ( lineStr = "line"; ) + result = TRUE; + } + } + if (result) + { + const unsigned long lNum = readLineNumber (); + if (lNum == 0) + result = FALSE; + else + { + vString *const fileName = readFileName (); + if (vStringLength (fileName) == 0) + { + File.source.lineNumber = lNum - 1; /* applies to NEXT line */ + DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld", lineStr, lNum); ) + } + else if (setSourceFileName (fileName)) + { + File.source.lineNumber = lNum - 1; /* applies to NEXT line */ + DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld \"%s\"", + lineStr, lNum, vStringValue (fileName)); ) + } + + if (Option.include.fileNames && vStringLength (fileName) > 0 && + lNum == 1) + { + tagEntryInfo tag; + initTagEntry (&tag, baseFilename (vStringValue (fileName))); + + tag.isFileEntry = TRUE; + tag.lineNumberEntry = TRUE; + tag.lineNumber = 1; + tag.kindName = "file"; + tag.kind = 'F'; + + makeTagEntry (&tag); + } + vStringDelete (fileName); + result = TRUE; + } + } + return result; +} + +/* + * Source file I/O operations + */ + +/* This function opens a source file, and resets the line counter. If it + * fails, it will display an error message and leave the File.fp set to NULL. + */ +extern boolean fileOpen (const char *const fileName, const langType language) +{ +#ifdef VMS + const char *const openMode = "r"; +#else + const char *const openMode = "rb"; +#endif + boolean opened = FALSE; + + /* If another file was already open, then close it. + */ + if (File.fp != NULL) + { + fclose (File.fp); /* close any open source file */ + File.fp = NULL; + } + + File.fp = fopen (fileName, openMode); + if (File.fp == NULL) + error (WARNING | PERROR, "cannot open \"%s\"", fileName); + else + { + opened = TRUE; + + setInputFileName (fileName); + fgetpos (File.fp, &StartOfLine); + fgetpos (File.fp, &File.filePosition); + File.currentLine = NULL; + File.lineNumber = 0L; + File.eof = FALSE; + File.newLine = TRUE; + + if (File.line != NULL) + vStringClear (File.line); + + setSourceFileParameters (vStringNewInit (fileName)); + File.source.lineNumber = 0L; + + verbose ("OPENING %s as %s language %sfile\n", fileName, + getLanguageName (language), + File.source.isHeader ? "include " : ""); + } + return opened; +} + +extern void fileClose (void) +{ + if (File.fp != NULL) + { + /* The line count of the file is 1 too big, since it is one-based + * and is incremented upon each newline. + */ + if (Option.printTotals) + { + fileStatus *status = eStat (vStringValue (File.name)); + addTotals (0, File.lineNumber - 1L, status->size); + } + fclose (File.fp); + File.fp = NULL; + } +} + +extern boolean fileEOF (void) +{ + return File.eof; +} + +/* Action to take for each encountered source newline. + */ +static void fileNewline (void) +{ + File.filePosition = StartOfLine; + File.newLine = FALSE; + File.lineNumber++; + File.source.lineNumber++; + DebugStatement ( if (Option.breakLine == File.lineNumber) lineBreak (); ) + DebugStatement ( debugPrintf (DEBUG_RAW, "%6ld: ", File.lineNumber); ) +} + +/* This function reads a single character from the stream, performing newline + * canonicalization. + */ +static int iFileGetc (void) +{ + int c; +readnext: + c = getc (File.fp); + + /* If previous character was a newline, then we're starting a line. + */ + if (File.newLine && c != EOF) + { + fileNewline (); + if (c == '#' && Option.lineDirectives) + { + if (parseLineDirective ()) + goto readnext; + else + { + fsetpos (File.fp, &StartOfLine); + c = getc (File.fp); + } + } + } + + if (c == EOF) + File.eof = TRUE; + else if (c == NEWLINE) + { + File.newLine = TRUE; + fgetpos (File.fp, &StartOfLine); + } + else if (c == CRETURN) + { + /* Turn line breaks into a canonical form. The three commonly + * used forms if line breaks: LF (UNIX/Mac OS X), CR (Mac OS 9), + * and CR-LF (MS-DOS) are converted into a generic newline. + */ +#ifndef macintosh + const int next = getc (File.fp); /* is CR followed by LF? */ + if (next != NEWLINE) + ungetc (next, File.fp); + else +#endif + { + c = NEWLINE; /* convert CR into newline */ + File.newLine = TRUE; + fgetpos (File.fp, &StartOfLine); + } + } + DebugStatement ( debugPutc (DEBUG_RAW, c); ) + return c; +} + +extern void fileUngetc (int c) +{ + File.ungetch = c; +} + +static vString *iFileGetLine (void) +{ + vString *result = NULL; + int c; + if (File.line == NULL) + File.line = vStringNew (); + vStringClear (File.line); + do + { + c = iFileGetc (); + if (c != EOF) + vStringPut (File.line, c); + if (c == '\n' || (c == EOF && vStringLength (File.line) > 0)) + { + vStringTerminate (File.line); +#ifdef HAVE_REGEX + if (vStringLength (File.line) > 0) + matchRegex (File.line, File.source.language); +#endif + result = File.line; + break; + } + } while (c != EOF); + Assert (result != NULL || File.eof); + return result; +} + +/* Do not mix use of fileReadLine () and fileGetc () for the same file. + */ +extern int fileGetc (void) +{ + int c; + + /* If there is an ungotten character, then return it. Don't do any + * other processing on it, though, because we already did that the + * first time it was read through fileGetc (). + */ + if (File.ungetch != '\0') + { + c = File.ungetch; + File.ungetch = '\0'; + return c; /* return here to avoid re-calling debugPutc () */ + } + do + { + if (File.currentLine != NULL) + { + c = *File.currentLine++; + if (c == '\0') + File.currentLine = NULL; + } + else + { + vString* const line = iFileGetLine (); + if (line != NULL) + File.currentLine = (unsigned char*) vStringValue (line); + if (File.currentLine == NULL) + c = EOF; + else + c = '\0'; + } + } while (c == '\0'); + DebugStatement ( debugPutc (DEBUG_READ, c); ) + return c; +} + +extern int fileSkipToCharacter (int c) +{ + int d; + do + { + d = fileGetc (); + } while (d != EOF && d != c); + return d; +} + +/* An alternative interface to fileGetc (). Do not mix use of fileReadLine() + * and fileGetc() for the same file. The returned string does not contain + * the terminating newline. A NULL return value means that all lines in the + * file have been read and we are at the end of file. + */ +extern const unsigned char *fileReadLine (void) +{ + vString* const line = iFileGetLine (); + const unsigned char* result = NULL; + if (line != NULL) + { + result = (const unsigned char*) vStringValue (line); + vStringStripNewline (line); + DebugStatement ( debugPrintf (DEBUG_READ, "%s\n", result); ) + } + return result; +} + +/* + * Source file line reading with automatic buffer sizing + */ +extern char *readLine (vString *const vLine, FILE *const fp) +{ + char *result = NULL; + + vStringClear (vLine); + if (fp == NULL) /* to free memory allocated to buffer */ + error (FATAL, "NULL file pointer"); + else + { + boolean reReadLine; + + /* If reading the line places any character other than a null or a + * newline at the last character position in the buffer (one less + * than the buffer size), then we must resize the buffer and + * reattempt to read the line. + */ + do + { + char *const pLastChar = vStringValue (vLine) + vStringSize (vLine) -2; + fpos_t startOfLine; + + fgetpos (fp, &startOfLine); + reReadLine = FALSE; + *pLastChar = '\0'; + result = fgets (vStringValue (vLine), (int) vStringSize (vLine), fp); + if (result == NULL) + { + if (! feof (fp)) + error (FATAL | PERROR, "Failure on attempt to read file"); + } + else if (*pLastChar != '\0' && + *pLastChar != '\n' && *pLastChar != '\r') + { + /* buffer overflow */ + reReadLine = vStringAutoResize (vLine); + if (reReadLine) + fsetpos (fp, &startOfLine); + else + error (FATAL | PERROR, "input line too big; out of memory"); + } + else + { + char* eol; + vStringSetLength (vLine); + /* canonicalize new line */ + eol = vStringValue (vLine) + vStringLength (vLine) - 1; + if (*eol == '\r') + *eol = '\n'; + else if (*(eol - 1) == '\r' && *eol == '\n') + { + *(eol - 1) = '\n'; + *eol = '\0'; + --vLine->length; + } + } + } while (reReadLine); + } + return result; +} + +/* Places into the line buffer the contents of the line referenced by + * "location". + */ +extern char *readSourceLine ( + vString *const vLine, fpos_t location, long *const pSeekValue) +{ + fpos_t orignalPosition; + char *result; + + fgetpos (File.fp, &orignalPosition); + fsetpos (File.fp, &location); + if (pSeekValue != NULL) + *pSeekValue = ftell (File.fp); + result = readLine (vLine, File.fp); + if (result == NULL) + error (FATAL, "Unexpected end of file: %s", vStringValue (File.name)); + fsetpos (File.fp, &orignalPosition); + + return result; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/read.h b/third_party/ctags/read.h new file mode 100644 index 000000000..b8a2fc66c --- /dev/null +++ b/third_party/ctags/read.h @@ -0,0 +1,123 @@ +// clang-format off +/* +* $Id: read.h 769 2010-09-11 21:00:16Z dhiebert $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to read.c +*/ +#ifndef _READ_H +#define _READ_H + +#if defined(FILE_WRITE) || defined(VAXC) +# define CONST_FILE +#else +# define CONST_FILE const +#endif + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#include "libc/str/str.h" + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/vstring.h" + +/* +* MACROS +*/ +#define getInputLineNumber() File.lineNumber +#define getInputFileName() vStringValue (File.source.name) +#define getInputFilePosition() File.filePosition +#define getSourceFileName() vStringValue (File.source.name) +#define getSourceFileTagPath() File.source.tagPath +#define getSourceLanguage() File.source.language +#define getSourceLanguageName() getLanguageName (File.source.language) +#define getSourceLineNumber() File.source.lineNumber +#define isLanguage(lang) (boolean)((lang) == File.source.language) +#define isHeaderFile() File.source.isHeader + +/* +* DATA DECLARATIONS +*/ + +enum eCharacters { + /* white space characters */ + SPACE = ' ', + NEWLINE = '\n', + CRETURN = '\r', + FORMFEED = '\f', + TAB = '\t', + VTAB = '\v', + + /* some hard to read characters */ + DOUBLE_QUOTE = '"', + SINGLE_QUOTE = '\'', + BACKSLASH = '\\', + + STRING_SYMBOL = ('S' + 0x80), + CHAR_SYMBOL = ('C' + 0x80) +}; + +/* Maintains the state of the current source file. + */ +typedef struct sInputFile { + vString *name; /* name of input file */ + vString *path; /* path of input file (if any) */ + vString *line; /* last line read from file */ + const unsigned char* currentLine; /* current line being worked on */ + FILE *fp; /* stream used for reading the file */ + unsigned long lineNumber; /* line number in the input file */ + fpos_t filePosition; /* file position of current line */ + int ungetch; /* a single character that was ungotten */ + boolean eof; /* have we reached the end of file? */ + boolean newLine; /* will the next character begin a new line? */ + + /* Contains data pertaining to the original source file in which the tag + * was defined. This may be different from the input file when #line + * directives are processed (i.e. the input file is preprocessor output). + */ + struct sSource { + vString *name; /* name to report for source file */ + char *tagPath; /* path of source file relative to tag file */ + unsigned long lineNumber;/* line number in the source file */ + boolean isHeader; /* is source file a header file? */ + langType language; /* language of source file */ + } source; +} inputFile; + +/* +* GLOBAL VARIABLES +*/ +extern CONST_FILE inputFile File; + +/* +* FUNCTION PROTOTYPES +*/ +extern void freeSourceFileResources (void); +extern boolean fileOpen (const char *const fileName, const langType language); +extern boolean fileEOF (void); +extern void fileClose (void); +extern int fileGetc (void); +extern int fileSkipToCharacter (int c); +extern void fileUngetc (int c); +extern const unsigned char *fileReadLine (void); +extern char *readLine (vString *const vLine, FILE *const fp); +extern char *readSourceLine (vString *const vLine, fpos_t location, long *const pSeekValue); + +#endif /* _READ_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/readtags.c b/third_party/ctags/readtags.c new file mode 100644 index 000000000..c3d6037ce --- /dev/null +++ b/third_party/ctags/readtags.c @@ -0,0 +1,956 @@ +/* + * $Id: readtags.c 592 2007-07-31 03:30:41Z dhiebert $ + * + * Copyright (c) 1996-2003, Darren Hiebert + * + * This source code is released into the public domain. + * + * This module contains functions for reading tag files. + */ +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/errno.h" +#include "libc/fmt/conv.h" +#include "libc/mem/mem.h" +#include "libc/stdio/stdio.h" +#include "libc/str/str.h" +#include "third_party/ctags/readtags.h" +// clang-format off + +/* +* MACROS +*/ +#define TAB '\t' + + +/* +* DATA DECLARATIONS +*/ +typedef struct { + size_t size; + char *buffer; +} vstring; + +/* Information about current tag file */ +struct sTagFile { + /* has the file been opened and this structure initialized? */ + short initialized; + /* format of tag file */ + short format; + /* how is the tag file sorted? */ + sortType sortMethod; + /* pointer to file structure */ + FILE* fp; + /* file position of first character of `line' */ + off_t pos; + /* size of tag file in seekable positions */ + off_t size; + /* last line read */ + vstring line; + /* name of tag in last line read */ + vstring name; + /* defines tag search state */ + struct { + /* file position of last match for tag */ + off_t pos; + /* name of tag last searched for */ + char *name; + /* length of name for partial matches */ + size_t nameLength; + /* peforming partial match */ + short partial; + /* ignoring case */ + short ignorecase; + } search; + /* miscellaneous extension fields */ + struct { + /* number of entries in `list' */ + unsigned short max; + /* list of key value pairs */ + tagExtensionField *list; + } fields; + /* buffers to be freed at close */ + struct { + /* name of program author */ + char *author; + /* name of program */ + char *name; + /* URL of distribution */ + char *url; + /* program version */ + char *version; + } program; +}; + +/* +* DATA DEFINITIONS +*/ +const char *const EmptyString = ""; +const char *const PseudoTagPrefix = "!_"; + +/* +* FUNCTION DEFINITIONS +*/ + +/* + * Compare two strings, ignoring case. + * Return 0 for match, < 0 for smaller, > 0 for bigger + * Make sure case is folded to uppercase in comparison (like for 'sort -f') + * This makes a difference when one of the chars lies between upper and lower + * ie. one of the chars [ \ ] ^ _ ` for ascii. (The '_' in particular !) + */ +static int struppercmp (const char *s1, const char *s2) +{ + int result; + do + { + result = toupper ((int) *s1) - toupper ((int) *s2); + } while (result == 0 && *s1++ != '\0' && *s2++ != '\0'); + return result; +} + +static int strnuppercmp (const char *s1, const char *s2, size_t n) +{ + int result; + do + { + result = toupper ((int) *s1) - toupper ((int) *s2); + } while (result == 0 && --n > 0 && *s1++ != '\0' && *s2++ != '\0'); + return result; +} + +static int growString (vstring *s) +{ + int result = 0; + size_t newLength; + char *newLine; + if (s->size == 0) + { + newLength = 128; + newLine = (char*) malloc (newLength); + *newLine = '\0'; + } + else + { + newLength = 2 * s->size; + newLine = (char*) realloc (s->buffer, newLength); + } + if (newLine == NULL) + perror ("string too large"); + else + { + s->buffer = newLine; + s->size = newLength; + result = 1; + } + return result; +} + +/* Copy name of tag out of tag line */ +static void copyName (tagFile *const file) +{ + size_t length; + const char *end = strchr (file->line.buffer, '\t'); + if (end == NULL) + { + end = strchr (file->line.buffer, '\n'); + if (end == NULL) + end = strchr (file->line.buffer, '\r'); + } + if (end != NULL) + length = end - file->line.buffer; + else + length = strlen (file->line.buffer); + while (length >= file->name.size) + growString (&file->name); + strncpy (file->name.buffer, file->line.buffer, length); + file->name.buffer [length] = '\0'; +} + +static int readTagLineRaw (tagFile *const file) +{ + int result = 1; + int reReadLine; + + /* If reading the line places any character other than a null or a + * newline at the last character position in the buffer (one less than + * the buffer size), then we must resize the buffer and reattempt to read + * the line. + */ + do + { + char *const pLastChar = file->line.buffer + file->line.size - 2; + char *line; + + file->pos = ftell (file->fp); + reReadLine = 0; + *pLastChar = '\0'; + line = fgets (file->line.buffer, (int) file->line.size, file->fp); + if (line == NULL) + { + /* read error */ + if (! feof (file->fp)) + perror ("readTagLine"); + result = 0; + } + else if (*pLastChar != '\0' && + *pLastChar != '\n' && *pLastChar != '\r') + { + /* buffer overflow */ + growString (&file->line); + fseek (file->fp, file->pos, SEEK_SET); + reReadLine = 1; + } + else + { + size_t i = strlen (file->line.buffer); + while (i > 0 && + (file->line.buffer [i - 1] == '\n' || file->line.buffer [i - 1] == '\r')) + { + file->line.buffer [i - 1] = '\0'; + --i; + } + } + } while (reReadLine && result); + if (result) + copyName (file); + return result; +} + +static int readTagLine (tagFile *const file) +{ + int result; + do + { + result = readTagLineRaw (file); + } while (result && *file->name.buffer == '\0'); + return result; +} + +static tagResult growFields (tagFile *const file) +{ + tagResult result = TagFailure; + unsigned short newCount = (unsigned short) 2 * file->fields.max; + tagExtensionField *newFields = (tagExtensionField*) + realloc (file->fields.list, newCount * sizeof (tagExtensionField)); + if (newFields == NULL) + perror ("too many extension fields"); + else + { + file->fields.list = newFields; + file->fields.max = newCount; + result = TagSuccess; + } + return result; +} + +static void parseExtensionFields (tagFile *const file, tagEntry *const entry, + char *const string) +{ + char *p = string; + while (p != NULL && *p != '\0') + { + while (*p == TAB) + *p++ = '\0'; + if (*p != '\0') + { + char *colon; + char *field = p; + p = strchr (p, TAB); + if (p != NULL) + *p++ = '\0'; + colon = strchr (field, ':'); + if (colon == NULL) + entry->kind = field; + else + { + const char *key = field; + const char *value = colon + 1; + *colon = '\0'; + if (strcmp (key, "kind") == 0) + entry->kind = value; + else if (strcmp (key, "file") == 0) + entry->fileScope = 1; + else if (strcmp (key, "line") == 0) + entry->address.lineNumber = atol (value); + else + { + if (entry->fields.count == file->fields.max) + growFields (file); + file->fields.list [entry->fields.count].key = key; + file->fields.list [entry->fields.count].value = value; + ++entry->fields.count; + } + } + } + } +} + +static void parseTagLine (tagFile *file, tagEntry *const entry) +{ + int i; + char *p = file->line.buffer; + char *tab = strchr (p, TAB); + + entry->fields.list = NULL; + entry->fields.count = 0; + entry->kind = NULL; + entry->fileScope = 0; + + entry->name = p; + if (tab != NULL) + { + *tab = '\0'; + p = tab + 1; + entry->file = p; + tab = strchr (p, TAB); + if (tab != NULL) + { + int fieldsPresent; + *tab = '\0'; + p = tab + 1; + if (*p == '/' || *p == '?') + { + /* parse pattern */ + int delimiter = *(unsigned char*) p; + entry->address.lineNumber = 0; + entry->address.pattern = p; + do + { + p = strchr (p + 1, delimiter); + } while (p != NULL && *(p - 1) == '\\'); + if (p == NULL) + { + /* invalid pattern */ + } + else + ++p; + } + else if (isdigit ((int) *(unsigned char*) p)) + { + /* parse line number */ + entry->address.pattern = p; + entry->address.lineNumber = atol (p); + while (isdigit ((int) *(unsigned char*) p)) + ++p; + } + else + { + /* invalid pattern */ + } + fieldsPresent = (strncmp (p, ";\"", 2) == 0); + *p = '\0'; + if (fieldsPresent) + parseExtensionFields (file, entry, p + 2); + } + } + if (entry->fields.count > 0) + entry->fields.list = file->fields.list; + for (i = entry->fields.count ; i < file->fields.max ; ++i) + { + file->fields.list [i].key = NULL; + file->fields.list [i].value = NULL; + } +} + +static char *duplicate (const char *str) +{ + char *result = NULL; + if (str != NULL) + { + result = strdup (str); + if (result == NULL) + perror (NULL); + } + return result; +} + +static void readPseudoTags (tagFile *const file, tagFileInfo *const info) +{ + fpos_t startOfLine; + const size_t prefixLength = strlen (PseudoTagPrefix); + if (info != NULL) + { + info->file.format = 1; + info->file.sort = TAG_UNSORTED; + info->program.author = NULL; + info->program.name = NULL; + info->program.url = NULL; + info->program.version = NULL; + } + while (1) + { + fgetpos (file->fp, &startOfLine); + if (! readTagLine (file)) + break; + if (strncmp (file->line.buffer, PseudoTagPrefix, prefixLength) != 0) + break; + else + { + tagEntry entry; + const char *key, *value; + parseTagLine (file, &entry); + key = entry.name + prefixLength; + value = entry.file; + if (strcmp (key, "TAG_FILE_SORTED") == 0) + file->sortMethod = (sortType) atoi (value); + else if (strcmp (key, "TAG_FILE_FORMAT") == 0) + file->format = (short) atoi (value); + else if (strcmp (key, "TAG_PROGRAM_AUTHOR") == 0) + file->program.author = duplicate (value); + else if (strcmp (key, "TAG_PROGRAM_NAME") == 0) + file->program.name = duplicate (value); + else if (strcmp (key, "TAG_PROGRAM_URL") == 0) + file->program.url = duplicate (value); + else if (strcmp (key, "TAG_PROGRAM_VERSION") == 0) + file->program.version = duplicate (value); + if (info != NULL) + { + info->file.format = file->format; + info->file.sort = file->sortMethod; + info->program.author = file->program.author; + info->program.name = file->program.name; + info->program.url = file->program.url; + info->program.version = file->program.version; + } + } + } + fsetpos (file->fp, &startOfLine); +} + +static void gotoFirstLogicalTag (tagFile *const file) +{ + fpos_t startOfLine; + const size_t prefixLength = strlen (PseudoTagPrefix); + rewind (file->fp); + while (1) + { + fgetpos (file->fp, &startOfLine); + if (! readTagLine (file)) + break; + if (strncmp (file->line.buffer, PseudoTagPrefix, prefixLength) != 0) + break; + } + fsetpos (file->fp, &startOfLine); +} + +static tagFile *initialize (const char *const filePath, tagFileInfo *const info) +{ + tagFile *result = (tagFile*) calloc ((size_t) 1, sizeof (tagFile)); + if (result != NULL) + { + growString (&result->line); + growString (&result->name); + result->fields.max = 20; + result->fields.list = (tagExtensionField*) calloc ( + result->fields.max, sizeof (tagExtensionField)); + result->fp = fopen (filePath, "r"); + if (result->fp == NULL) + { + free (result); + result = NULL; + info->status.error_number = errno; + } + else + { + fseek (result->fp, 0, SEEK_END); + result->size = ftell (result->fp); + rewind (result->fp); + readPseudoTags (result, info); + info->status.opened = 1; + result->initialized = 1; + } + } + return result; +} + +static void terminate (tagFile *const file) +{ + fclose (file->fp); + + free (file->line.buffer); + free (file->name.buffer); + free (file->fields.list); + + if (file->program.author != NULL) + free (file->program.author); + if (file->program.name != NULL) + free (file->program.name); + if (file->program.url != NULL) + free (file->program.url); + if (file->program.version != NULL) + free (file->program.version); + if (file->search.name != NULL) + free (file->search.name); + + memset (file, 0, sizeof (tagFile)); + + free (file); +} + +static tagResult readNext (tagFile *const file, tagEntry *const entry) +{ + tagResult result; + if (file == NULL || ! file->initialized) + result = TagFailure; + else if (! readTagLine (file)) + result = TagFailure; + else + { + if (entry != NULL) + parseTagLine (file, entry); + result = TagSuccess; + } + return result; +} + +static const char *readFieldValue ( + const tagEntry *const entry, const char *const key) +{ + const char *result = NULL; + int i; + if (strcmp (key, "kind") == 0) + result = entry->kind; + else if (strcmp (key, "file") == 0) + result = EmptyString; + else for (i = 0 ; i < entry->fields.count && result == NULL ; ++i) + if (strcmp (entry->fields.list [i].key, key) == 0) + result = entry->fields.list [i].value; + return result; +} + +static int readTagLineSeek (tagFile *const file, const off_t pos) +{ + int result = 0; + if (fseek (file->fp, pos, SEEK_SET) == 0) + { + result = readTagLine (file); /* read probable partial line */ + if (pos > 0 && result) + result = readTagLine (file); /* read complete line */ + } + return result; +} + +static int nameComparison (tagFile *const file) +{ + int result; + if (file->search.ignorecase) + { + if (file->search.partial) + result = strnuppercmp (file->search.name, file->name.buffer, + file->search.nameLength); + else + result = struppercmp (file->search.name, file->name.buffer); + } + else + { + if (file->search.partial) + result = strncmp (file->search.name, file->name.buffer, + file->search.nameLength); + else + result = strcmp (file->search.name, file->name.buffer); + } + return result; +} + +static void findFirstNonMatchBefore (tagFile *const file) +{ +#define JUMP_BACK 512 + int more_lines; + int comp; + off_t start = file->pos; + off_t pos = start; + do + { + if (pos < (off_t) JUMP_BACK) + pos = 0; + else + pos = pos - JUMP_BACK; + more_lines = readTagLineSeek (file, pos); + comp = nameComparison (file); + } while (more_lines && comp == 0 && pos > 0 && pos < start); +} + +static tagResult findFirstMatchBefore (tagFile *const file) +{ + tagResult result = TagFailure; + int more_lines; + off_t start = file->pos; + findFirstNonMatchBefore (file); + do + { + more_lines = readTagLine (file); + if (nameComparison (file) == 0) + result = TagSuccess; + } while (more_lines && result != TagSuccess && file->pos < start); + return result; +} + +static tagResult findBinary (tagFile *const file) +{ + tagResult result = TagFailure; + off_t lower_limit = 0; + off_t upper_limit = file->size; + off_t last_pos = 0; + off_t pos = upper_limit / 2; + while (result != TagSuccess) + { + if (! readTagLineSeek (file, pos)) + { + /* in case we fell off end of file */ + result = findFirstMatchBefore (file); + break; + } + else if (pos == last_pos) + { + /* prevent infinite loop if we backed up to beginning of file */ + break; + } + else + { + const int comp = nameComparison (file); + last_pos = pos; + if (comp < 0) + { + upper_limit = pos; + pos = lower_limit + ((upper_limit - lower_limit) / 2); + } + else if (comp > 0) + { + lower_limit = pos; + pos = lower_limit + ((upper_limit - lower_limit) / 2); + } + else if (pos == 0) + result = TagSuccess; + else + result = findFirstMatchBefore (file); + } + } + return result; +} + +static tagResult findSequential (tagFile *const file) +{ + tagResult result = TagFailure; + if (file->initialized) + { + while (result == TagFailure && readTagLine (file)) + { + if (nameComparison (file) == 0) + result = TagSuccess; + } + } + return result; +} + +static tagResult find (tagFile *const file, tagEntry *const entry, + const char *const name, const int options) +{ + tagResult result; + if (file->search.name != NULL) + free (file->search.name); + file->search.name = duplicate (name); + file->search.nameLength = strlen (name); + file->search.partial = (options & TAG_PARTIALMATCH) != 0; + file->search.ignorecase = (options & TAG_IGNORECASE) != 0; + fseek (file->fp, 0, SEEK_END); + file->size = ftell (file->fp); + rewind (file->fp); + if ((file->sortMethod == TAG_SORTED && !file->search.ignorecase) || + (file->sortMethod == TAG_FOLDSORTED && file->search.ignorecase)) + { +#ifdef DEBUG + printf ("\n"); +#endif + result = findBinary (file); + } + else + { +#ifdef DEBUG + printf ("\n"); +#endif + result = findSequential (file); + } + + if (result != TagSuccess) + file->search.pos = file->size; + else + { + file->search.pos = file->pos; + if (entry != NULL) + parseTagLine (file, entry); + } + return result; +} + +static tagResult findNext (tagFile *const file, tagEntry *const entry) +{ + tagResult result; + if ((file->sortMethod == TAG_SORTED && !file->search.ignorecase) || + (file->sortMethod == TAG_FOLDSORTED && file->search.ignorecase)) + { + result = tagsNext (file, entry); + if (result == TagSuccess && nameComparison (file) != 0) + result = TagFailure; + } + else + { + result = findSequential (file); + if (result == TagSuccess && entry != NULL) + parseTagLine (file, entry); + } + return result; +} + +/* +* EXTERNAL INTERFACE +*/ + +extern tagFile *tagsOpen (const char *const filePath, tagFileInfo *const info) +{ + return initialize (filePath, info); +} + +extern tagResult tagsSetSortType (tagFile *const file, const sortType type) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + { + file->sortMethod = type; + result = TagSuccess; + } + return result; +} + +extern tagResult tagsFirst (tagFile *const file, tagEntry *const entry) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + { + gotoFirstLogicalTag (file); + result = readNext (file, entry); + } + return result; +} + +extern tagResult tagsNext (tagFile *const file, tagEntry *const entry) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + result = readNext (file, entry); + return result; +} + +extern const char *tagsField (const tagEntry *const entry, const char *const key) +{ + const char *result = NULL; + if (entry != NULL) + result = readFieldValue (entry, key); + return result; +} + +extern tagResult tagsFind (tagFile *const file, tagEntry *const entry, + const char *const name, const int options) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + result = find (file, entry, name, options); + return result; +} + +extern tagResult tagsFindNext (tagFile *const file, tagEntry *const entry) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + result = findNext (file, entry); + return result; +} + +extern tagResult tagsClose (tagFile *const file) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + { + terminate (file); + result = TagSuccess; + } + return result; +} + +/* +* TEST FRAMEWORK +*/ + +#ifdef READTAGS_MAIN + +static const char *TagFileName = "tags"; +static const char *ProgramName; +static int extensionFields; +static int SortOverride; +static sortType SortMethod; + +static void printTag (const tagEntry *entry) +{ + int i; + int first = 1; + const char* separator = ";\""; + const char* const empty = ""; +/* "sep" returns a value only the first time it is evaluated */ +#define sep (first ? (first = 0, separator) : empty) + printf ("%s\t%s\t%s", + entry->name, entry->file, entry->address.pattern); + if (extensionFields) + { + if (entry->kind != NULL && entry->kind [0] != '\0') + printf ("%s\tkind:%s", sep, entry->kind); + if (entry->fileScope) + printf ("%s\tfile:", sep); +#if 0 + if (entry->address.lineNumber > 0) + printf ("%s\tline:%lu", sep, entry->address.lineNumber); +#endif + for (i = 0 ; i < entry->fields.count ; ++i) + printf ("%s\t%s:%s", sep, entry->fields.list [i].key, + entry->fields.list [i].value); + } + putchar ('\n'); +#undef sep +} + +static void findTag (const char *const name, const int options) +{ + tagFileInfo info; + tagEntry entry; + tagFile *const file = tagsOpen (TagFileName, &info); + if (file == NULL) + { + fprintf (stderr, "%s: cannot open tag file: %s: %s\n", + ProgramName, strerror (info.status.error_number), name); + exit (1); + } + else + { + if (SortOverride) + tagsSetSortType (file, SortMethod); + if (tagsFind (file, &entry, name, options) == TagSuccess) + { + do + { + printTag (&entry); + } while (tagsFindNext (file, &entry) == TagSuccess); + } + tagsClose (file); + } +} + +static void listTags (void) +{ + tagFileInfo info; + tagEntry entry; + tagFile *const file = tagsOpen (TagFileName, &info); + if (file == NULL) + { + fprintf (stderr, "%s: cannot open tag file: %s: %s\n", + ProgramName, strerror (info.status.error_number), TagFileName); + exit (1); + } + else + { + while (tagsNext (file, &entry) == TagSuccess) + printTag (&entry); + tagsClose (file); + } +} + +const char *const Usage = + "Find tag file entries matching specified names.\n\n" + "Usage: %s [-ilp] [-s[0|1]] [-t file] [name(s)]\n\n" + "Options:\n" + " -e Include extension fields in output.\n" + " -i Perform case-insensitive matching.\n" + " -l List all tags.\n" + " -p Perform partial matching.\n" + " -s[0|1|2] Override sort detection of tag file.\n" + " -t file Use specified tag file (default: \"tags\").\n" + "Note that options are acted upon as encountered, so order is significant.\n"; + +extern int main (int argc, char **argv) +{ + int options = 0; + int actionSupplied = 0; + int i; + ProgramName = argv [0]; + if (argc == 1) + { + fprintf (stderr, Usage, ProgramName); + exit (1); + } + for (i = 1 ; i < argc ; ++i) + { + const char *const arg = argv [i]; + if (arg [0] != '-') + { + findTag (arg, options); + actionSupplied = 1; + } + else + { + size_t j; + for (j = 1 ; arg [j] != '\0' ; ++j) + { + switch (arg [j]) + { + case 'e': extensionFields = 1; break; + case 'i': options |= TAG_IGNORECASE; break; + case 'p': options |= TAG_PARTIALMATCH; break; + case 'l': listTags (); actionSupplied = 1; break; + + case 't': + if (arg [j+1] != '\0') + { + TagFileName = arg + j + 1; + j += strlen (TagFileName); + } + else if (i + 1 < argc) + TagFileName = argv [++i]; + else + { + fprintf (stderr, Usage, ProgramName); + exit (1); + } + break; + case 's': + SortOverride = 1; + ++j; + if (arg [j] == '\0') + SortMethod = TAG_SORTED; + else if (strchr ("012", arg[j]) != NULL) + SortMethod = (sortType) (arg[j] - '0'); + else + { + fprintf (stderr, Usage, ProgramName); + exit (1); + } + break; + default: + fprintf (stderr, "%s: unknown option: %c\n", + ProgramName, arg[j]); + exit (1); + break; + } + } + } + } + if (! actionSupplied) + { + fprintf (stderr, + "%s: no action specified: specify tag name(s) or -l option\n", + ProgramName); + exit (1); + } + return 0; +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/readtags.h b/third_party/ctags/readtags.h new file mode 100644 index 000000000..14a789069 --- /dev/null +++ b/third_party/ctags/readtags.h @@ -0,0 +1,253 @@ +// clang-format off +/* +* $Id: readtags.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released for the public domain. +* +* This file defines the public interface for looking up tag entries in tag +* files. +* +* The functions defined in this interface are intended to provide tag file +* support to a software tool. The tag lookups provided are sufficiently fast +* enough to permit opening a sorted tag file, searching for a matching tag, +* then closing the tag file each time a tag is looked up (search times are +* on the order of hundreths of a second, even for huge tag files). This is +* the recommended use of this library for most tool applications. Adhering +* to this approach permits a user to regenerate a tag file at will without +* the tool needing to detect and resynchronize with changes to the tag file. +* Even for an unsorted 24MB tag file, tag searches take about one second. +*/ +#ifndef READTAGS_H +#define READTAGS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* +* MACROS +*/ + +/* Options for tagsSetSortType() */ +typedef enum { + TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED +} sortType ; + +/* Options for tagsFind() */ +#define TAG_FULLMATCH 0x0 +#define TAG_PARTIALMATCH 0x1 + +#define TAG_OBSERVECASE 0x0 +#define TAG_IGNORECASE 0x2 + +/* +* DATA DECLARATIONS +*/ + +typedef enum { TagFailure = 0, TagSuccess = 1 } tagResult; + +struct sTagFile; + +typedef struct sTagFile tagFile; + +/* This structure contains information about the tag file. */ +typedef struct { + + struct { + /* was the tag file successfully opened? */ + int opened; + + /* errno value when 'opened' is false */ + int error_number; + } status; + + /* information about the structure of the tag file */ + struct { + /* format of tag file (1 = original, 2 = extended) */ + short format; + + /* how is the tag file sorted? */ + sortType sort; + } file; + + + /* information about the program which created this tag file */ + struct { + /* name of author of generating program (may be null) */ + const char *author; + + /* name of program (may be null) */ + const char *name; + + /* URL of distribution (may be null) */ + const char *url; + + /* program version (may be null) */ + const char *version; + } program; + +} tagFileInfo; + +/* This structure contains information about an extension field for a tag. + * These exist at the end of the tag in the form "key:value"). + */ +typedef struct { + + /* the key of the extension field */ + const char *key; + + /* the value of the extension field (may be an empty string) */ + const char *value; + +} tagExtensionField; + +/* This structure contains information about a specific tag. */ +typedef struct { + + /* name of tag */ + const char *name; + + /* path of source file containing definition of tag */ + const char *file; + + /* address for locating tag in source file */ + struct { + /* pattern for locating source line + * (may be NULL if not present) */ + const char *pattern; + + /* line number in source file of tag definition + * (may be zero if not known) */ + unsigned long lineNumber; + } address; + + /* kind of tag (may by name, character, or NULL if not known) */ + const char *kind; + + /* is tag of file-limited scope? */ + short fileScope; + + /* miscellaneous extension fields */ + struct { + /* number of entries in `list' */ + unsigned short count; + + /* list of key value pairs */ + tagExtensionField *list; + } fields; + +} tagEntry; + + +/* +* FUNCTION PROTOTYPES +*/ + +/* +* This function must be called before calling other functions in this +* library. It is passed the path to the tag file to read and a (possibly +* null) pointer to a structure which, if not null, will be populated with +* information about the tag file. If successful, the function will return a +* handle which must be supplied to other calls to read information from the +* tag file, and info.status.opened will be set to true. If unsuccessful, +* info.status.opened will be set to false and info.status.error_number will +* be set to the errno value representing the system error preventing the tag +* file from being successfully opened. +*/ +extern tagFile *tagsOpen (const char *const filePath, tagFileInfo *const info); + +/* +* This function allows the client to override the normal automatic detection +* of how a tag file is sorted. Permissible values for `type' are +* TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED. Tag files in the new extended +* format contain a key indicating whether or not they are sorted. However, +* tag files in the original format do not contain such a key even when +* sorted, preventing this library from taking advantage of fast binary +* lookups. If the client knows that such an unmarked tag file is indeed +* sorted (or not), it can override the automatic detection. Note that +* incorrect lookup results will result if a tag file is marked as sorted when +* it actually is not. The function will return TagSuccess if called on an +* open tag file or TagFailure if not. +*/ +extern tagResult tagsSetSortType (tagFile *const file, const sortType type); + +/* +* Reads the first tag in the file, if any. It is passed the handle to an +* opened tag file and a (possibly null) pointer to a structure which, if not +* null, will be populated with information about the first tag file entry. +* The function will return TagSuccess another tag entry is found, or +* TagFailure if not (i.e. it reached end of file). +*/ +extern tagResult tagsFirst (tagFile *const file, tagEntry *const entry); + +/* +* Step to the next tag in the file, if any. It is passed the handle to an +* opened tag file and a (possibly null) pointer to a structure which, if not +* null, will be populated with information about the next tag file entry. The +* function will return TagSuccess another tag entry is found, or TagFailure +* if not (i.e. it reached end of file). It will always read the first tag in +* the file immediately after calling tagsOpen(). +*/ +extern tagResult tagsNext (tagFile *const file, tagEntry *const entry); + +/* +* Retrieve the value associated with the extension field for a specified key. +* It is passed a pointer to a structure already populated with values by a +* previous call to tagsNext(), tagsFind(), or tagsFindNext(), and a string +* containing the key of the desired extension field. If no such field of the +* specified key exists, the function will return null. +*/ +extern const char *tagsField (const tagEntry *const entry, const char *const key); + +/* +* Find the first tag matching `name'. The structure pointed to by `entry' +* will be populated with information about the tag file entry. If a tag file +* is sorted using the C locale, a binary search algorithm is used to search +* the tag file, resulting in very fast tag lookups, even in huge tag files. +* Various options controlling the matches can be combined by bit-wise or-ing +* certain values together. The available values are: +* +* TAG_PARTIALMATCH +* Tags whose leading characters match `name' will qualify. +* +* TAG_FULLMATCH +* Only tags whose full lengths match `name' will qualify. +* +* TAG_IGNORECASE +* Matching will be performed in a case-insenstive manner. Note that +* this disables binary searches of the tag file. +* +* TAG_OBSERVECASE +* Matching will be performed in a case-senstive manner. Note that +* this enables binary searches of the tag file. +* +* The function will return TagSuccess if a tag matching the name is found, or +* TagFailure if not. +*/ +extern tagResult tagsFind (tagFile *const file, tagEntry *const entry, const char *const name, const int options); + +/* +* Find the next tag matching the name and options supplied to the most recent +* call to tagsFind() for the same tag file. The structure pointed to by +* `entry' will be populated with information about the tag file entry. The +* function will return TagSuccess if another tag matching the name is found, +* or TagFailure if not. +*/ +extern tagResult tagsFindNext (tagFile *const file, tagEntry *const entry); + +/* +* Call tagsTerminate() at completion of reading the tag file, which will +* close the file and free any internal memory allocated. The function will +* return TagFailure is no file is currently open, TagSuccess otherwise. +*/ +extern tagResult tagsClose (tagFile *const file); + +#ifdef __cplusplus +}; +#endif + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/rexx.c b/third_party/ctags/rexx.c new file mode 100644 index 000000000..d1fa03c67 --- /dev/null +++ b/third_party/ctags/rexx.c @@ -0,0 +1,40 @@ +// clang-format off +/* +* $Id: rexx.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2001-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the REXX language +* (http://www.rexxla.org, http://www2.hursley.ibm.com/rexx). +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* always include first */ +#include "third_party/ctags/parse.h" /* always include */ + +/* +* FUNCTION DEFINITIONS +*/ + +static void installRexxRegex (const langType language) +{ + addTagRegex (language, "^([A-Za-z0-9@#$\\.!?_]+)[ \t]*:", + "\\1", "s,subroutine,subroutines", NULL); +} + +extern parserDefinition* RexxParser (void) +{ + static const char *const extensions [] = { "cmd", "rexx", "rx", NULL }; + parserDefinition* const def = parserNew ("REXX"); + def->extensions = extensions; + def->initialize = installRexxRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/routines.c b/third_party/ctags/routines.c new file mode 100644 index 000000000..5710a587e --- /dev/null +++ b/third_party/ctags/routines.c @@ -0,0 +1,533 @@ +/* + * $Id: routines.c 536 2007-06-02 06:09:00Z elliotth $ + * + * Copyright (c) 2002-2003, Darren Hiebert + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains a lose assortment of shared functions. + */ +#include "third_party/ctags/general.h" +/**/ +#include "libc/calls/calls.h" +#include "libc/calls/struct/stat.h" +#include "libc/dce.h" +#include "libc/errno.h" +#include "libc/stdio/stdio.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/s.h" +#include "third_party/ctags/config.h" +#include "third_party/ctags/debug.h" +#include "third_party/ctags/routines.h" +// clang-format off + +/* + * Miscellaneous macros + */ +#define selected(var,feature) (((int)(var) & (int)(feature)) == (int)feature) + +char *CurrentDirectory; + +static const char *ExecutableProgram; +static const char *ExecutableName; + +/* +* FUNCTION DEFINITIONS +*/ + +extern void freeRoutineResources (void) +{ + if (CurrentDirectory != NULL) + eFree (CurrentDirectory); +} + +extern void setExecutableName (const char *const path) +{ + ExecutableProgram = path; + ExecutableName = baseFilename (path); +#ifdef VAXC +{ + /* remove filetype from executable name */ + char *p = strrchr (ExecutableName, '.'); + if (p != NULL) + *p = '\0'; +} +#endif +} + +extern const char *getExecutableName (void) +{ + return ExecutableName; +} + +extern const char *getExecutablePath (void) +{ + return ExecutableProgram; +} + +extern void error ( + const errorSelection selection, const char *const format, ...) +{ + va_list ap; + + va_start (ap, format); + fprintf (errout, "%s: %s", getExecutableName (), + selected (selection, WARNING) ? "Warning: " : ""); + vfprintf (errout, format, ap); + if (selected (selection, PERROR)) + fprintf (errout, " : %s", strerror (errno)); + fputs ("\n", errout); + va_end (ap); + if (selected (selection, FATAL)) + exit (1); +} + +/* + * Memory allocation functions + */ + +extern void *eMalloc (const size_t size) +{ + void *buffer = malloc (size); + + if (buffer == NULL) + error (FATAL, "out of memory"); + + return buffer; +} + +extern void *eCalloc (const size_t count, const size_t size) +{ + void *buffer = calloc (count, size); + + if (buffer == NULL) + error (FATAL, "out of memory"); + + return buffer; +} + +extern void *eRealloc (void *const ptr, const size_t size) +{ + void *buffer; + if (ptr == NULL) + buffer = eMalloc (size); + else + { + buffer = realloc (ptr, size); + if (buffer == NULL) + error (FATAL, "out of memory"); + } + return buffer; +} + +extern void eFree (void *const ptr) +{ + Assert (ptr != NULL); + free (ptr); +} + +/* + * String manipulation functions + */ + +/* + * Compare two strings, ignoring case. + * Return 0 for match, < 0 for smaller, > 0 for bigger + * Make sure case is folded to uppercase in comparison (like for 'sort -f') + * This makes a difference when one of the chars lies between upper and lower + * ie. one of the chars [ \ ] ^ _ ` for ascii. (The '_' in particular !) + */ +extern int struppercmp (const char *s1, const char *s2) +{ + int result; + do + { + result = toupper ((int) *s1) - toupper ((int) *s2); + } while (result == 0 && *s1++ != '\0' && *s2++ != '\0'); + return result; +} + +extern int strnuppercmp (const char *s1, const char *s2, size_t n) +{ + int result; + do + { + result = toupper ((int) *s1) - toupper ((int) *s2); + } while (result == 0 && --n > 0 && *s1++ != '\0' && *s2++ != '\0'); + return result; +} + +extern char* eStrdup (const char* str) +{ + char* result = xMalloc (strlen (str) + 1, char); + strcpy (result, str); + return result; +} + +extern void toLowerString (char* str) +{ + while (*str != '\0') + { + *str = tolower ((int) *str); + ++str; + } +} + +extern void toUpperString (char* str) +{ + while (*str != '\0') + { + *str = toupper ((int) *str); + ++str; + } +} + +/* Newly allocated string containing lower case conversion of a string. + */ +extern char* newLowerString (const char* str) +{ + char* const result = xMalloc (strlen (str) + 1, char); + int i = 0; + do + result [i] = tolower ((int) str [i]); + while (str [i++] != '\0'); + return result; +} + +/* Newly allocated string containing upper case conversion of a string. + */ +extern char* newUpperString (const char* str) +{ + char* const result = xMalloc (strlen (str) + 1, char); + int i = 0; + do + result [i] = toupper ((int) str [i]); + while (str [i++] != '\0'); + return result; +} + +/* + * File system functions + */ + +extern void setCurrentDirectory (void) +{ + char* buf; + if (CurrentDirectory == NULL) + CurrentDirectory = xMalloc ((size_t) (PATH_MAX + 1), char); + buf = getcwd (CurrentDirectory, PATH_MAX); + if (buf == NULL) + perror (""); + if (CurrentDirectory [strlen (CurrentDirectory) - (size_t) 1] != + PATH_SEPARATOR) + { + sprintf (CurrentDirectory + strlen (CurrentDirectory), "%c", + OUTPUT_PATH_SEPARATOR); + } +} + +/* For caching of stat() calls */ +extern fileStatus *eStat (const char *const fileName) +{ + struct stat status; + static fileStatus file; + if (file.name == NULL || strcmp (fileName, file.name) != 0) + { + eStatFree (&file); + file.name = eStrdup (fileName); + if (lstat (file.name, &status) != 0) + file.exists = FALSE; + else + { + file.isSymbolicLink = (boolean) S_ISLNK (status.st_mode); + if (file.isSymbolicLink && stat (file.name, &status) != 0) + file.exists = FALSE; + else + { + file.exists = TRUE; + file.isDirectory = (boolean) S_ISDIR (status.st_mode); + file.isNormalFile = (boolean) (S_ISREG (status.st_mode)); + file.isExecutable = (boolean) ((status.st_mode & + (S_IXUSR | S_IXGRP | S_IXOTH)) != 0); + file.isSetuid = (boolean) ((status.st_mode & S_ISUID) != 0); + file.size = status.st_size; + } + } + } + return &file; +} + +extern void eStatFree (fileStatus *status) +{ + if (status->name != NULL) + { + eFree (status->name); + status->name = NULL; + } +} + +extern boolean doesFileExist (const char *const fileName) +{ + fileStatus *status = eStat (fileName); + return status->exists; +} + +extern boolean isRecursiveLink (const char* const dirName) +{ + boolean result = FALSE; + fileStatus *status = eStat (dirName); + if (status->isSymbolicLink) + { + char* const path = absoluteFilename (dirName); + while (path [strlen (path) - 1] == PATH_SEPARATOR) + path [strlen (path) - 1] = '\0'; + while (! result && strlen (path) > (size_t) 1) + { + char *const separator = strrchr (path, PATH_SEPARATOR); + if (separator == NULL) + break; + else if (separator == path) /* backed up to root directory */ + *(separator + 1) = '\0'; + else + *separator = '\0'; + result = isSameFile (path, dirName); + } + eFree (path); + } + return result; +} + +/* + * Pathname manipulation (O/S dependent!!!) + */ + +static boolean isPathSeparator (const int c) +{ + boolean result; + result = (boolean) (c == PATH_SEPARATOR); + return result; +} + +extern boolean isSameFile (const char *const name1, const char *const name2) +{ + boolean result = FALSE; + struct stat stat1, stat2; + if (stat (name1, &stat1) == 0 && stat (name2, &stat2) == 0) + result = (boolean) (stat1.st_ino == stat2.st_ino); + return result; +} + +extern const char *baseFilename (const char *const filePath) +{ + const char *tail = strrchr (filePath, PATH_SEPARATOR); + if (tail == NULL) + tail = filePath; + else + ++tail; /* step past last delimiter */ + + return tail; +} + +extern const char *fileExtension (const char *const fileName) +{ + const char *extension; + const char *pDelimiter = NULL; + const char *const base = baseFilename (fileName); + if (pDelimiter == NULL) + pDelimiter = strrchr (base, '.'); + + if (pDelimiter == NULL) + extension = ""; + else + extension = pDelimiter + 1; /* skip to first char of extension */ + + return extension; +} + +extern boolean isAbsolutePath (const char *const path) +{ + boolean result = FALSE; + result = isPathSeparator (path [0]); + return result; +} + +extern vString *combinePathAndFile ( + const char *const path, const char *const file) +{ + vString *const filePath = vStringNew (); + const int lastChar = path [strlen (path) - 1]; + boolean terminated = isPathSeparator (lastChar); + + vStringCopyS (filePath, path); + if (! terminated) + { + vStringPut (filePath, OUTPUT_PATH_SEPARATOR); + vStringTerminate (filePath); + } + vStringCatS (filePath, file); + + return filePath; +} + +/* Return a newly-allocated string whose contents concatenate those of + * s1, s2, s3. + * Routine adapted from Gnu etags. + */ +static char* concat (const char *s1, const char *s2, const char *s3) +{ + int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); + char *result = xMalloc (len1 + len2 + len3 + 1, char); + + strcpy (result, s1); + strcpy (result + len1, s2); + strcpy (result + len1 + len2, s3); + result [len1 + len2 + len3] = '\0'; + + return result; +} + +/* Return a newly allocated string containing the absolute file name of FILE + * given CWD (which should end with a slash). + * Routine adapted from Gnu etags. + */ +extern char* absoluteFilename (const char *file) +{ + char *slashp, *cp; + char *res = NULL; + if (isAbsolutePath (file)) + { + res = eStrdup (file); + } + else + res = concat (CurrentDirectory, file, ""); + + /* Delete the "/dirname/.." and "/." substrings. */ + slashp = strchr (res, PATH_SEPARATOR); + while (slashp != NULL && slashp [0] != '\0') + { + if (slashp[1] == '.') + { + if (slashp [2] == '.' && + (slashp [3] == PATH_SEPARATOR || slashp [3] == '\0')) + { + cp = slashp; + do + cp--; + while (cp >= res && ! isAbsolutePath (cp)); + if (cp < res) + cp = slashp;/* the absolute name begins with "/.." */ + memmove (cp, slashp + 3, strlen(slashp + 3) + 1); + slashp = cp; + continue; + } + else if (slashp [2] == PATH_SEPARATOR || slashp [2] == '\0') + { + memmove (slashp, slashp + 2, strlen(slashp + 2) + 1); + continue; + } + } + slashp = strchr (slashp + 1, PATH_SEPARATOR); + } + + if (res [0] == '\0') + return eStrdup ("/"); + else + { + return res; + } +} + +/* Return a newly allocated string containing the absolute file name of dir + * where `file' resides given `CurrentDirectory'. + * Routine adapted from Gnu etags. + */ +extern char* absoluteDirname (char *file) +{ + char *slashp, *res; + char save; + slashp = strrchr (file, PATH_SEPARATOR); + if (slashp == NULL) + res = eStrdup (CurrentDirectory); + else + { + save = slashp [1]; + slashp [1] = '\0'; + res = absoluteFilename (file); + slashp [1] = save; + } + return res; +} + +/* Return a newly allocated string containing the file name of FILE relative + * to the absolute directory DIR (which should end with a slash). + * Routine adapted from Gnu etags. + */ +extern char* relativeFilename (const char *file, const char *dir) +{ + const char *fp, *dp; + char *absdir, *res; + int i; + + /* Find the common root of file and dir (with a trailing slash). */ + absdir = absoluteFilename (file); + fp = absdir; + dp = dir; + while (*fp++ == *dp++) + continue; + fp--; + dp--; /* back to the first differing char */ + do + { /* look at the equal chars until path sep */ + if (fp == absdir) + return absdir; /* first char differs, give up */ + fp--; + dp--; + } while (*fp != PATH_SEPARATOR); + + /* Build a sequence of "../" strings for the resulting relative file name. + */ + i = 0; + while ((dp = strchr (dp + 1, PATH_SEPARATOR)) != NULL) + i += 1; + res = xMalloc (3 * i + strlen (fp + 1) + 1, char); + res [0] = '\0'; + while (i-- > 0) + strcat (res, "../"); + + /* Add the file name relative to the common root of file and dir. */ + strcat (res, fp + 1); + free (absdir); + + return res; +} + +extern FILE *tempFile (const char *const mode, char **const pName) +{ + char *name; + FILE *fp; + int fd; + const char *const pattern = "tags.XXXXXX"; + const char *tmpdir = NULL; + fileStatus *file = eStat (ExecutableProgram); + if (! file->isSetuid) + tmpdir = getenv ("TMPDIR"); + if (tmpdir == NULL) + tmpdir = TMPDIR; + name = xMalloc (strlen (tmpdir) + 1 + strlen (pattern) + 1, char); + sprintf (name, "%s%c%s", tmpdir, OUTPUT_PATH_SEPARATOR, pattern); + fd = mkstemp (name); + eStatFree (file); + if (fd == -1) + error (FATAL | PERROR, "cannot open temporary file"); + fp = fdopen (fd, mode); + if (fp == NULL) + error (FATAL | PERROR, "cannot open temporary file"); + DebugStatement ( + debugPrintf (DEBUG_STATUS, "opened temporary file %s\n", name); ) + Assert (*pName == NULL); + *pName = name; + return fp; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/routines.h b/third_party/ctags/routines.h new file mode 100644 index 000000000..e4d0d7a8c --- /dev/null +++ b/third_party/ctags/routines.h @@ -0,0 +1,137 @@ +// clang-format off +/* +* $Id: routines.h 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to routines.c +*/ +#ifndef _ROUTINES_H +#define _ROUTINES_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/vstring.h" +#include "libc/stdio/stdio.h" +#include "third_party/ctags/general.h" /* must always come first */ + +/* +* MACROS +*/ +#define xMalloc(n,Type) (Type *)eMalloc((size_t)(n) * sizeof (Type)) +#define xCalloc(n,Type) (Type *)eCalloc((size_t)(n), sizeof (Type)) +#define xRealloc(p,n,Type) (Type *)eRealloc((p), (n) * sizeof (Type)) + +/* + * Portability macros + */ +#ifndef PATH_SEPARATOR +# if defined (MSDOS_STYLE_PATH) +# define PATH_SEPARATOR '\\' +# elif defined (QDOS) +# define PATH_SEPARATOR '_' +# else +# define PATH_SEPARATOR '/' +# endif +#endif + +#if defined (MSDOS_STYLE_PATH) && defined (UNIX_PATH_SEPARATOR) +# define OUTPUT_PATH_SEPARATOR '/' +#else +# define OUTPUT_PATH_SEPARATOR PATH_SEPARATOR +#endif + +/* +* DATA DECLARATIONS +*/ +#if defined (MSDOS_STYLE_PATH) || defined (VMS) +extern const char *const PathDelimiters; +#endif +extern char *CurrentDirectory; +typedef int errorSelection; +enum eErrorTypes { FATAL = 1, WARNING = 2, PERROR = 4 }; + +typedef struct { + /* Name of file for which status is valid */ + char* name; + + /* Does file exist? If not, members below do not contain valid data. */ + boolean exists; + + /* is file path a symbolic link to another file? */ + boolean isSymbolicLink; + + /* Is file (pointed to) a directory? */ + boolean isDirectory; + + /* Is file (pointed to) a normal file? */ + boolean isNormalFile; + + /* Is file (pointed to) executable? */ + boolean isExecutable; + + /* Is file (pointed to) setuid? */ + boolean isSetuid; + + /* Size of file (pointed to) */ + unsigned long size; +} fileStatus; + +/* +* FUNCTION PROTOTYPES +*/ +extern void freeRoutineResources (void); +extern void setExecutableName (const char *const path); +extern const char *getExecutableName (void); +extern const char *getExecutablePath (void); +extern void error (const errorSelection selection, const char *const format, ...) __printf (2, 3); + +/* Memory allocation functions */ +#ifdef NEED_PROTO_MALLOC +extern void *malloc (size_t); +extern void *realloc (void *ptr, size_t); +#endif +extern void *eMalloc (const size_t size); +extern void *eCalloc (const size_t count, const size_t size); +extern void *eRealloc (void *const ptr, const size_t size); +extern void eFree (void *const ptr); + +/* String manipulation functions */ +extern int struppercmp (const char *s1, const char *s2); +extern int strnuppercmp (const char *s1, const char *s2, size_t n); +#ifndef HAVE_STRSTR +extern char* strstr (const char *str, const char *substr); +#endif +extern char* eStrdup (const char* str); +extern void toLowerString (char* str); +extern void toUpperString (char* str); +extern char* newLowerString (const char* str); +extern char* newUpperString (const char* str); + +/* File system functions */ +extern void setCurrentDirectory (void); +extern fileStatus *eStat (const char *const fileName); +extern void eStatFree (fileStatus *status); +extern boolean doesFileExist (const char *const fileName); +extern boolean isRecursiveLink (const char* const dirName); +extern boolean isSameFile (const char *const name1, const char *const name2); +#if defined(NEED_PROTO_FGETPOS) +extern int fgetpos (FILE *stream, fpos_t *pos); +extern int fsetpos (FILE *stream, fpos_t *pos); +#endif +extern const char *baseFilename (const char *const filePath); +extern const char *fileExtension (const char *const fileName); +extern boolean isAbsolutePath (const char *const path); +extern vString *combinePathAndFile (const char *const path, const char *const file); +extern char* absoluteFilename (const char *file); +extern char* absoluteDirname (char *file); +extern char* relativeFilename (const char *file, const char *dir); +extern FILE *tempFile (const char *const mode, char **const pName); + +#endif /* _ROUTINES_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/ruby.c b/third_party/ctags/ruby.c new file mode 100644 index 000000000..d0409c8d5 --- /dev/null +++ b/third_party/ctags/ruby.c @@ -0,0 +1,410 @@ +// clang-format off +/* +* $Id: ruby.c 571 2007-06-24 23:32:14Z elliotth $ +* +* Copyright (c) 2000-2001, Thaddeus Covert +* Copyright (c) 2002 Matthias Veit +* Copyright (c) 2004 Elliott Hughes +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Ruby language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/entry.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DECLARATIONS +*/ +typedef enum { + K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON +} rubyKind; + +/* +* DATA DEFINITIONS +*/ +static kindOption RubyKinds [] = { + { TRUE, 'c', "class", "classes" }, + { TRUE, 'f', "method", "methods" }, + { TRUE, 'm', "module", "modules" }, + { TRUE, 'F', "singleton method", "singleton methods" } +}; + +static stringList* nesting = 0; + +/* +* FUNCTION DEFINITIONS +*/ + +/* +* Returns a string describing the scope in 'list'. +* We record the current scope as a list of entered scopes. +* Scopes corresponding to 'if' statements and the like are +* represented by empty strings. Scopes corresponding to +* modules and classes are represented by the name of the +* module or class. +*/ +static vString* stringListToScope (const stringList* list) +{ + unsigned int i; + unsigned int chunks_output = 0; + vString* result = vStringNew (); + const unsigned int max = stringListCount (list); + for (i = 0; i < max; ++i) + { + vString* chunk = stringListItem (list, i); + if (vStringLength (chunk) > 0) + { + vStringCatS (result, (chunks_output++ > 0) ? "." : ""); + vStringCatS (result, vStringValue (chunk)); + } + } + return result; +} + +/* +* Attempts to advance 's' past 'literal'. +* Returns TRUE if it did, FALSE (and leaves 's' where +* it was) otherwise. +*/ +static boolean canMatch (const unsigned char** s, const char* literal) +{ + const int literal_length = strlen (literal); + const unsigned char next_char = *(*s + literal_length); + if (strncmp ((const char*) *s, literal, literal_length) != 0) + { + return FALSE; + } + /* Additionally check that we're at the end of a token. */ + if ( ! (next_char == 0 || isspace (next_char) || next_char == '(')) + { + return FALSE; + } + *s += literal_length; + return TRUE; +} + +/* +* Attempts to advance 'cp' past a Ruby operator method name. Returns +* TRUE if successful (and copies the name into 'name'), FALSE otherwise. +*/ +static boolean parseRubyOperator (vString* name, const unsigned char** cp) +{ + static const char* RUBY_OPERATORS[] = { + "[]", "[]=", + "**", + "!", "~", "+@", "-@", + "*", "/", "%", + "+", "-", + ">>", "<<", + "&", + "^", "|", + "<=", "<", ">", ">=", + "<=>", "==", "===", "!=", "=~", "!~", + "`", + 0 + }; + int i; + for (i = 0; RUBY_OPERATORS[i] != 0; ++i) + { + if (canMatch (cp, RUBY_OPERATORS[i])) + { + vStringCatS (name, RUBY_OPERATORS[i]); + return TRUE; + } + } + return FALSE; +} + +/* +* Emits a tag for the given 'name' of kind 'kind' at the current nesting. +*/ +static void emitRubyTag (vString* name, rubyKind kind) +{ + tagEntryInfo tag; + vString* scope; + + vStringTerminate (name); + scope = stringListToScope (nesting); + + initTagEntry (&tag, vStringValue (name)); + if (vStringLength (scope) > 0) { + tag.extensionFields.scope [0] = "class"; + tag.extensionFields.scope [1] = vStringValue (scope); + } + tag.kindName = RubyKinds [kind].name; + tag.kind = RubyKinds [kind].letter; + makeTagEntry (&tag); + + stringListAdd (nesting, vStringNewCopy (name)); + + vStringClear (name); + vStringDelete (scope); +} + +/* Tests whether 'ch' is a character in 'list'. */ +static boolean charIsIn (char ch, const char* list) +{ + return (strchr (list, ch) != 0); +} + +/* Advances 'cp' over leading whitespace. */ +static void skipWhitespace (const unsigned char** cp) +{ + while (isspace (**cp)) + { + ++*cp; + } +} + +/* +* Copies the characters forming an identifier from *cp into +* name, leaving *cp pointing to the character after the identifier. +*/ +static rubyKind parseIdentifier ( + const unsigned char** cp, vString* name, rubyKind kind) +{ + /* Method names are slightly different to class and variable names. + * A method name may optionally end with a question mark, exclamation + * point or equals sign. These are all part of the name. + * A method name may also contain a period if it's a singleton method. + */ + const char* also_ok = (kind == K_METHOD) ? "_.?!=" : "_"; + + skipWhitespace (cp); + + /* Check for an anonymous (singleton) class such as "class << HTTP". */ + if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<') + { + return K_UNDEFINED; + } + + /* Check for operators such as "def []=(key, val)". */ + if (kind == K_METHOD || kind == K_SINGLETON) + { + if (parseRubyOperator (name, cp)) + { + return kind; + } + } + + /* Copy the identifier into 'name'. */ + while (**cp != 0 && (isalnum (**cp) || charIsIn (**cp, also_ok))) + { + char last_char = **cp; + + vStringPut (name, last_char); + ++*cp; + + if (kind == K_METHOD) + { + /* Recognize singleton methods. */ + if (last_char == '.') + { + vStringTerminate (name); + vStringClear (name); + return parseIdentifier (cp, name, K_SINGLETON); + } + + /* Recognize characters which mark the end of a method name. */ + if (charIsIn (last_char, "?!=")) + { + break; + } + } + } + return kind; +} + +static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind) +{ + if (isspace (**cp)) + { + vString *name = vStringNew (); + rubyKind actual_kind = parseIdentifier (cp, name, expected_kind); + + if (actual_kind == K_UNDEFINED || vStringLength (name) == 0) + { + /* + * What kind of tags should we create for code like this? + * + * %w(self.clfloor clfloor).each do |name| + * module_eval <<-"end;" + * def #{name}(x, y=1) + * q, r = x.divmod(y) + * q = q.to_i + * return q, r + * end + * end; + * end + * + * Or this? + * + * class << HTTP + * + * For now, we don't create any. + */ + } + else + { + emitRubyTag (name, actual_kind); + } + vStringDelete (name); + } +} + +static void enterUnnamedScope (void) +{ + stringListAdd (nesting, vStringNewInit ("")); +} + +static void findRubyTags (void) +{ + const unsigned char *line; + boolean inMultiLineComment = FALSE; + + nesting = stringListNew (); + + /* FIXME: this whole scheme is wrong, because Ruby isn't line-based. + * You could perfectly well write: + * + * def + * method + * puts("hello") + * end + * + * if you wished, and this function would fail to recognize anything. + */ + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + + if (canMatch (&cp, "=begin")) + { + inMultiLineComment = TRUE; + continue; + } + if (canMatch (&cp, "=end")) + { + inMultiLineComment = FALSE; + continue; + } + + skipWhitespace (&cp); + + /* Avoid mistakenly starting a scope for modifiers such as + * + * return if + * + * FIXME: this is fooled by code such as + * + * result = if + * + * else + * + * end + * + * FIXME: we're also fooled if someone does something heinous such as + * + * puts("hello") \ + * unless + */ + if (canMatch (&cp, "case") || canMatch (&cp, "for") || + canMatch (&cp, "if") || canMatch (&cp, "unless") || + canMatch (&cp, "while")) + { + enterUnnamedScope (); + } + + /* + * "module M", "class C" and "def m" should only be at the beginning + * of a line. + */ + if (canMatch (&cp, "module")) + { + readAndEmitTag (&cp, K_MODULE); + } + else if (canMatch (&cp, "class")) + { + readAndEmitTag (&cp, K_CLASS); + } + else if (canMatch (&cp, "def")) + { + readAndEmitTag (&cp, K_METHOD); + } + + while (*cp != '\0') + { + /* FIXME: we don't cope with here documents, + * or regular expression literals, or ... you get the idea. + * Hopefully, the restriction above that insists on seeing + * definitions at the starts of lines should keep us out of + * mischief. + */ + if (inMultiLineComment || isspace (*cp)) + { + ++cp; + } + else if (*cp == '#') + { + /* FIXME: this is wrong, but there *probably* won't be a + * definition after an interpolated string (where # doesn't + * mean 'comment'). + */ + break; + } + else if (canMatch (&cp, "begin") || canMatch (&cp, "do")) + { + enterUnnamedScope (); + } + else if (canMatch (&cp, "end") && stringListCount (nesting) > 0) + { + /* Leave the most recent scope. */ + vStringDelete (stringListLast (nesting)); + stringListRemoveLast (nesting); + } + else if (*cp == '"') + { + /* Skip string literals. + * FIXME: should cope with escapes and interpolation. + */ + do { + ++cp; + } while (*cp != 0 && *cp != '"'); + } + else if (*cp != '\0') + { + do + ++cp; + while (isalnum (*cp) || *cp == '_'); + } + } + } + stringListDelete (nesting); +} + +extern parserDefinition* RubyParser (void) +{ + static const char *const extensions [] = { "rb", "ruby", NULL }; + parserDefinition* def = parserNew ("Ruby"); + def->kinds = RubyKinds; + def->kindCount = KIND_COUNT (RubyKinds); + def->extensions = extensions; + def->parser = findRubyTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/scheme.c b/third_party/ctags/scheme.c new file mode 100644 index 000000000..9da44cfc1 --- /dev/null +++ b/third_party/ctags/scheme.c @@ -0,0 +1,113 @@ +// clang-format off +/* +* $Id: scheme.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Scheme language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION, K_SET +} schemeKind; + +static kindOption SchemeKinds [] = { + { TRUE, 'f', "function", "functions" }, + { TRUE, 's', "set", "sets" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* Algorithm adapted from from GNU etags. + * Scheme tag functions + * look for (def... xyzzy + * look for (def... (xyzzy + * look for (def ... ((... (xyzzy .... + * look for (set! xyzzy + */ +static void readIdentifier (vString *const name, const unsigned char *cp) +{ + const unsigned char *p; + vStringClear (name); + /* Go till you get to white space or a syntactic break */ + for (p = cp; *p != '\0' && *p != '(' && *p != ')' && !isspace (*p); p++) + vStringPut (name, (int) *p); + vStringTerminate (name); +} + +static void findSchemeTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + + if (cp [0] == '(' && + (cp [1] == 'D' || cp [1] == 'd') && + (cp [2] == 'E' || cp [2] == 'e') && + (cp [3] == 'F' || cp [3] == 'f')) + { + while (!isspace (*cp)) + cp++; + /* Skip over open parens and white space */ + while (*cp != '\0' && (isspace (*cp) || *cp == '(')) + cp++; + readIdentifier (name, cp); + makeSimpleTag (name, SchemeKinds, K_FUNCTION); + } + if (cp [0] == '(' && + (cp [1] == 'S' || cp [1] == 's') && + (cp [2] == 'E' || cp [2] == 'e') && + (cp [3] == 'T' || cp [3] == 't') && + (cp [4] == '!' || cp [4] == '!') && + (isspace (cp [5]))) + { + while (*cp != '\0' && !isspace (*cp)) + cp++; + /* Skip over white space */ + while (isspace (*cp)) + cp++; + readIdentifier (name, cp); + makeSimpleTag (name, SchemeKinds, K_SET); + } + } + vStringDelete (name); +} + +extern parserDefinition* SchemeParser (void) +{ + static const char *const extensions [] = { + "SCM", "SM", "sch", "scheme", "scm", "sm", NULL + }; + parserDefinition* def = parserNew ("Scheme"); + def->kinds = SchemeKinds; + def->kindCount = KIND_COUNT (SchemeKinds); + def->extensions = extensions; + def->parser = findSchemeTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/sh.c b/third_party/ctags/sh.c new file mode 100644 index 000000000..d18377dba --- /dev/null +++ b/third_party/ctags/sh.c @@ -0,0 +1,117 @@ +// clang-format off +/* +* $Id: sh.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for scripts for the +* Bourne shell (and its derivatives, the Korn and Z shells). +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION +} shKind; + +static kindOption ShKinds [] = { + { TRUE, 'f', "function", "functions"} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* Reject any tag "main" from a file named "configure". These appear in + * here-documents in GNU autoconf scripts and will add a haystack to the + * needle. + */ +static boolean hackReject (const vString* const tagName) +{ + const char *const scriptName = baseFilename (vStringValue (File.name)); + boolean result = (boolean) ( + strcmp (scriptName, "configure") == 0 && + strcmp (vStringValue (tagName), "main") == 0); + return result; +} + +static void findShTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char* cp = line; + boolean functionFound = FALSE; + + if (line [0] == '#') + continue; + + while (isspace (*cp)) + cp++; + if (strncmp ((const char*) cp, "function", (size_t) 8) == 0 && + isspace ((int) cp [8])) + { + functionFound = TRUE; + cp += 8; + if (! isspace ((int) *cp)) + continue; + while (isspace ((int) *cp)) + ++cp; + } + if (! (isalnum ((int) *cp) || *cp == '_')) + continue; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + while (isspace ((int) *cp)) + ++cp; + if (*cp++ == '(') + { + while (isspace ((int) *cp)) + ++cp; + if (*cp == ')' && ! hackReject (name)) + functionFound = TRUE; + } + if (functionFound) + makeSimpleTag (name, ShKinds, K_FUNCTION); + vStringClear (name); + } + vStringDelete (name); +} + +extern parserDefinition* ShParser (void) +{ + static const char *const extensions [] = { + "sh", "SH", "bsh", "bash", "ksh", "zsh", NULL + }; + parserDefinition* def = parserNew ("Sh"); + def->kinds = ShKinds; + def->kindCount = KIND_COUNT (ShKinds); + def->extensions = extensions; + def->parser = findShTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/slang.c b/third_party/ctags/slang.c new file mode 100644 index 000000000..df5d4a3fe --- /dev/null +++ b/third_party/ctags/slang.c @@ -0,0 +1,42 @@ +// clang-format off +/* + * $Id: slang.c 443 2006-05-30 04:37:13Z darren $ + * + * Copyright (c) 2000-2001, Francesc Rocher + * + * Author: Francesc Rocher . + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for S-Lang files. + */ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ +#include "third_party/ctags/parse.h" + +/* + * FUNCTION DEFINITIONS + */ +static void installSlangRegex (const langType language) +{ + addTagRegex (language, + "^.*define[ \t]+([A-Z_][A-Z0-9_]*)[^;]*$", + "\\1", "f,function,functions", "i"); + addTagRegex (language, + "^[ \t]*implements[ \t]+\\([ \t]*\"([^\"]*)\"[ \t]*\\)[ \t]*;", + "\\1", "n,namespace,namespaces", NULL); +} + +extern parserDefinition* SlangParser (void) +{ + static const char *const extensions [] = { "sl", NULL }; + parserDefinition* const def = parserNew ("SLang"); + def->extensions = extensions; + def->initialize = installSlangRegex; + def->regex = TRUE; + return def; +} diff --git a/third_party/ctags/sml.c b/third_party/ctags/sml.c new file mode 100644 index 000000000..965ec7c0b --- /dev/null +++ b/third_party/ctags/sml.c @@ -0,0 +1,214 @@ +// clang-format off +/* +* $Id: sml.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2002, Venkatesh Prasad Ranganath and Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for SML language files. +*/ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/entry.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* + * DATA DECLARATIONS + */ +typedef enum { + K_AND = -2, + K_NONE = -1, + K_EXCEPTION, + K_FUNCTION, + K_FUNCTOR, + K_SIGNATURE, + K_STRUCTURE, + K_TYPE, + K_VAL +} smlKind; + +/* + * DATA DEFINITIONS + */ +static kindOption SmlKinds[] = { + { TRUE, 'e', "exception", "exception declarations" }, + { TRUE, 'f', "function", "function definitions" }, + { TRUE, 'c', "functor", "functor definitions" }, + { TRUE, 's', "signature", "signature declarations" }, + { TRUE, 'r', "structure", "structure declarations" }, + { TRUE, 't', "type", "type definitions" }, + { TRUE, 'v', "value", "value bindings" } +}; + +static struct { + const char *keyword; + smlKind kind; +} SmlKeywordTypes [] = { + { "abstype", K_TYPE }, + { "and", K_AND }, + { "datatype", K_TYPE }, + { "exception", K_EXCEPTION }, + { "functor", K_FUNCTOR }, + { "fun", K_FUNCTION }, + { "signature", K_SIGNATURE }, + { "structure", K_STRUCTURE }, + { "type", K_TYPE }, + { "val", K_VAL } +}; + +static unsigned int CommentLevel = 0; + +/* + * FUNCTION DEFINITIONS + */ + +static void makeSmlTag (smlKind type, vString *name) +{ + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (name)); + tag.kindName = SmlKinds [type].name; + tag.kind = SmlKinds [type].letter; + makeTagEntry (&tag); +} + +static const unsigned char *skipSpace (const unsigned char *cp) +{ + while (isspace ((int) *cp)) + ++cp; + return cp; +} + +static boolean isIdentifier (int c) +{ + boolean result = FALSE; + /* Consider '_' as an delimiter to aid user in tracking it's usage. */ + const char *const alternateIdentifiers = "!%&$#+-<>=/?@\\~'^|*_"; + if (isalnum (c)) + result = TRUE; + else if (c != '\0' && strchr (alternateIdentifiers, c) != NULL) + result = TRUE; + return result; +} + +static const unsigned char *parseIdentifier ( + const unsigned char *cp, vString *const identifier) +{ + boolean stringLit = FALSE; + vStringClear (identifier); + while (*cp != '\0' && (!isIdentifier ((int) *cp) || stringLit)) + { + int oneback = *cp; + cp++; + if (oneback == '(' && *cp == '*' && stringLit == FALSE) + { + CommentLevel++; + return ++cp; + } + if (*cp == '"' && oneback != '\\') + { + stringLit = TRUE; + continue; + } + if (stringLit && *cp == '"' && oneback != '\\') + stringLit = FALSE; + } + if (strcmp ((const char *) cp, "") == 0 || cp == NULL) + return cp; + + while (isIdentifier ((int) *cp)) + { + vStringPut (identifier, (int) *cp); + cp++; + } + vStringTerminate (identifier); + return cp; +} + +static smlKind findNextIdentifier (const unsigned char **cp) +{ + smlKind result = K_NONE; + vString *const identifier = vStringNew (); + unsigned int count = sizeof (SmlKeywordTypes) / sizeof (SmlKeywordTypes [0]); + unsigned int i; + *cp = parseIdentifier (*cp, identifier); + for (i = 0 ; i < count && result == K_NONE ; ++i) + { + const char *id = vStringValue (identifier); + if (strcmp (id, SmlKeywordTypes [i].keyword) == 0) + result = SmlKeywordTypes [i].kind; + } + vStringDelete (identifier); + return result; +} + +static void findSmlTags (void) +{ + vString *const identifier = vStringNew (); + const unsigned char *line; + smlKind lastTag = K_NONE; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = skipSpace (line); + do + { + smlKind foundTag; + if (CommentLevel != 0) + { + cp = (const unsigned char *) strstr ((const char *) cp, "*)"); + if (cp == NULL) + continue; + else + { + --CommentLevel; + cp += 2; + } + } + foundTag = findNextIdentifier (&cp); + if (foundTag != K_NONE) + { + cp = skipSpace (cp); + cp = parseIdentifier (cp, identifier); + if (foundTag == K_AND) + makeSmlTag (lastTag, identifier); + else + { + makeSmlTag (foundTag, identifier); + lastTag = foundTag; + } + } + if (strstr ((const char *) cp, "(*") != NULL) + { + cp += 2; + cp = (const unsigned char *) strstr ((const char *) cp, "*)"); + if (cp == NULL) + ++CommentLevel; + } + } while (cp != NULL && strcmp ((const char *) cp, "") != 0); + } + vStringDelete (identifier); +} + +extern parserDefinition *SmlParser (void) +{ + static const char *const extensions[] = { "sml", "sig", NULL }; + parserDefinition *def = parserNew ("SML"); + def->kinds = SmlKinds; + def->kindCount = KIND_COUNT (SmlKinds); + def->extensions = extensions; + def->parser = findSmlTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/sort.c b/third_party/ctags/sort.c new file mode 100644 index 000000000..9019f8b82 --- /dev/null +++ b/third_party/ctags/sort.c @@ -0,0 +1,255 @@ +// clang-format off +/* +* $Id: sort.c 747 2009-11-06 02:33:37Z dhiebert $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions to sort the tag entries. +*/ + +/* +* INCLUDE FILES +*/ +#include "libc/mem/mem.h" +#include "third_party/ctags/general.h" /* must always come first */ + +#if defined (HAVE_STDLIB_H) +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/termios.h" +#include "libc/fmt/conv.h" +#include "libc/limits.h" +#include "libc/mem/alg.h" +#include "libc/mem/mem.h" +#include "libc/runtime/runtime.h" +#include "libc/stdio/rand.h" +#include "libc/stdio/temp.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/exit.h" +#include "third_party/gdtoa/gdtoa.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/rand48.h" /* to declare malloc () */ +#endif +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/options.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/sort.h" + +/* +* FUNCTION DEFINITIONS +*/ + +extern void catFile (const char *const name) +{ + FILE *const fp = fopen (name, "r"); + + if (fp != NULL) + { + int c; + while ((c = getc (fp)) != EOF) + putchar (c); + fflush (stdout); + fclose (fp); + } +} + +#ifdef EXTERNAL_SORT + +#ifdef NON_CONST_PUTENV_PROTOTYPE +# define PE_CONST +#else +# define PE_CONST const +#endif + +extern void externalSortTags (const boolean toStdout) +{ + const char *const sortNormalCommand = "sort -u -o"; + const char *const sortFoldedCommand = "sort -u -f -o"; + const char *sortCommand = + Option.sorted == SO_FOLDSORTED ? sortFoldedCommand : sortNormalCommand; + PE_CONST char *const sortOrder1 = "LC_COLLATE=C"; + PE_CONST char *const sortOrder2 = "LC_ALL=C"; + const size_t length = 4 + strlen (sortOrder1) + strlen (sortOrder2) + + strlen (sortCommand) + (2 * strlen (tagFileName ())); + char *const cmd = (char *) malloc (length + 1); + int ret = -1; + + if (cmd != NULL) + { + /* Ensure ASCII value sort order. + */ +#ifdef HAVE_SETENV + setenv ("LC_COLLATE", "C", 1); + setenv ("LC_ALL", "C", 1); + sprintf (cmd, "%s %s %s", sortCommand, tagFileName (), tagFileName ()); +#else +# ifdef HAVE_PUTENV + putenv (sortOrder1); + putenv (sortOrder2); + sprintf (cmd, "%s %s %s", sortCommand, tagFileName (), tagFileName ()); +# else + sprintf (cmd, "%s %s %s %s %s", sortOrder1, sortOrder2, sortCommand, + tagFileName (), tagFileName ()); +# endif +#endif + verbose ("system (\"%s\")\n", cmd); + ret = system (cmd); + free (cmd); + + } + if (ret != 0) + error (FATAL | PERROR, "cannot sort tag file"); + else if (toStdout) + catFile (tagFileName ()); +} + +#else + +/* + * These functions provide a basic internal sort. No great memory + * optimization is performed (e.g. recursive subdivided sorts), + * so have lots of memory if you have large tag files. + */ + +static void failedSort (FILE *const fp, const char* msg) +{ + const char* const cannotSort = "cannot sort tag file"; + if (fp != NULL) + fclose (fp); + if (msg == NULL) + error (FATAL | PERROR, "%s", cannotSort); + else + error (FATAL, "%s: %s", msg, cannotSort); +} + +static int compareTagsFolded(const void *const one, const void *const two) +{ + const char *const line1 = *(const char* const*) one; + const char *const line2 = *(const char* const*) two; + + return struppercmp (line1, line2); +} + +static int compareTags (const void *const one, const void *const two) +{ + const char *const line1 = *(const char* const*) one; + const char *const line2 = *(const char* const*) two; + + return strcmp (line1, line2); +} + +static void writeSortedTags ( + char **const table, const size_t numTags, const boolean toStdout) +{ + FILE *fp; + size_t i; + + /* Write the sorted lines back into the tag file. + */ + if (toStdout) + fp = stdout; + else + { + fp = fopen (tagFileName (), "w"); + if (fp == NULL) + failedSort (fp, NULL); + } + for (i = 0 ; i < numTags ; ++i) + { + /* Here we filter out identical tag *lines* (including search + * pattern) if this is not an xref file. + */ + if (i == 0 || Option.xref || strcmp (table [i], table [i-1]) != 0) + if (fputs (table [i], fp) == EOF) + failedSort (fp, NULL); + } + if (toStdout) + fflush (fp); + else + fclose (fp); +} + +extern void internalSortTags (const boolean toStdout) +{ + vString *vLine = vStringNew (); + FILE *fp = NULL; + const char *line; + size_t i; + int (*cmpFunc)(const void *, const void *); + + /* Allocate a table of line pointers to be sorted. + */ + size_t numTags = TagFile.numTags.added + TagFile.numTags.prev; + const size_t tableSize = numTags * sizeof (char *); + char **const table = (char **) malloc (tableSize); /* line pointers */ + DebugStatement ( size_t mallocSize = tableSize; ) /* cumulative total */ + + + cmpFunc = Option.sorted == SO_FOLDSORTED ? compareTagsFolded : compareTags; + if (table == NULL) + failedSort (fp, "out of memory"); + + /* Open the tag file and place its lines into allocated buffers. + */ + fp = fopen (tagFileName (), "r"); + if (fp == NULL) + failedSort (fp, NULL); + for (i = 0 ; i < numTags && ! feof (fp) ; ) + { + line = readLine (vLine, fp); + if (line == NULL) + { + if (! feof (fp)) + failedSort (fp, NULL); + break; + } + else if (*line == '\0' || strcmp (line, "\n") == 0) + ; /* ignore blank lines */ + else + { + const size_t stringSize = strlen (line) + 1; + + table [i] = (char *) malloc (stringSize); + if (table [i] == NULL) + failedSort (fp, "out of memory"); + DebugStatement ( mallocSize += stringSize; ) + strcpy (table [i], line); + ++i; + } + } + numTags = i; + fclose (fp); + vStringDelete (vLine); + + /* Sort the lines. + */ + qsort (table, numTags, sizeof (*table), cmpFunc); + + writeSortedTags (table, numTags, toStdout); + + PrintStatus (("sort memory: %ld bytes\n", (long) mallocSize)); + for (i = 0 ; i < numTags ; ++i) + free (table [i]); + free (table); +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/sort.h b/third_party/ctags/sort.h new file mode 100644 index 000000000..1c9575714 --- /dev/null +++ b/third_party/ctags/sort.h @@ -0,0 +1,33 @@ +// clang-format off +/* +* $Id: sort.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to sort.c +*/ +#ifndef _SORT_H +#define _SORT_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +/* +* FUNCTION PROTOTYPES +*/ +extern void catFile (const char *const name); + +#ifdef EXTERNAL_SORT +extern void externalSortTags (const boolean toStdout); +#else +extern void internalSortTags (const boolean toStdout); +#endif + +#endif /* _SORT_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/sql.c b/third_party/ctags/sql.c new file mode 100644 index 000000000..3321f109e --- /dev/null +++ b/third_party/ctags/sql.c @@ -0,0 +1,2379 @@ +// clang-format off +/* + * $Id: sql.c 761 2010-06-04 12:40:28Z dfishburn $ + * + * Copyright (c) 2002-2003, Darren Hiebert + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for PL/SQL language + * files. + */ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/str/str.h" /* to define isalpha () */ +#include "libc/runtime/runtime.h" +#ifdef DEBUG +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#endif + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* + * On-line "Oracle Database PL/SQL Language Reference": + * http://download.oracle.com/docs/cd/B28359_01/appdev.111/b28370/toc.htm + * + * Sample PL/SQL code is available from: + * http://www.orafaq.com/faqscrpt.htm#GENPLSQL + * + * On-line SQL Anywhere Documentation + * http://www.ianywhere.com/developer/product_manuals/sqlanywhere/index.html + */ + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_is, + KEYWORD_begin, + KEYWORD_body, + KEYWORD_cursor, + KEYWORD_declare, + KEYWORD_end, + KEYWORD_function, + KEYWORD_if, + KEYWORD_else, + KEYWORD_elseif, + KEYWORD_endif, + KEYWORD_loop, + KEYWORD_while, + KEYWORD_case, + KEYWORD_for, + KEYWORD_do, + KEYWORD_call, + KEYWORD_package, + KEYWORD_pragma, + KEYWORD_procedure, + KEYWORD_record, + KEYWORD_object, + KEYWORD_ref, + KEYWORD_rem, + KEYWORD_return, + KEYWORD_returns, + KEYWORD_subtype, + KEYWORD_table, + KEYWORD_trigger, + KEYWORD_type, + KEYWORD_index, + KEYWORD_event, + KEYWORD_publication, + KEYWORD_service, + KEYWORD_domain, + KEYWORD_datatype, + KEYWORD_result, + KEYWORD_url, + KEYWORD_internal, + KEYWORD_external, + KEYWORD_when, + KEYWORD_then, + KEYWORD_variable, + KEYWORD_exception, + KEYWORD_at, + KEYWORD_on, + KEYWORD_primary, + KEYWORD_references, + KEYWORD_unique, + KEYWORD_check, + KEYWORD_constraint, + KEYWORD_foreign, + KEYWORD_ml_table, + KEYWORD_ml_table_lang, + KEYWORD_ml_table_dnet, + KEYWORD_ml_table_java, + KEYWORD_ml_table_chk, + KEYWORD_ml_conn, + KEYWORD_ml_conn_lang, + KEYWORD_ml_conn_dnet, + KEYWORD_ml_conn_java, + KEYWORD_ml_conn_chk, + KEYWORD_ml_prop, + KEYWORD_local, + KEYWORD_temporary, + KEYWORD_drop, + KEYWORD_view, + KEYWORD_synonym, + KEYWORD_handler, + KEYWORD_comment, + KEYWORD_create, + KEYWORD_go +} keywordId; + +/* + * Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_BLOCK_LABEL_BEGIN, + TOKEN_BLOCK_LABEL_END, + TOKEN_CHARACTER, + TOKEN_CLOSE_PAREN, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_COMMA, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_OTHER, + TOKEN_STRING, + TOKEN_PERIOD, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_TILDE, + TOKEN_FORWARD_SLASH, + TOKEN_EQUAL +} tokenType; + +typedef struct sTokenInfoSQL { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + int begin_end_nest_lvl; + unsigned long lineNumber; + fpos_t filePosition; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_sql; + +static jmp_buf Exception; + +typedef enum { + SQLTAG_CURSOR, + SQLTAG_PROTOTYPE, + SQLTAG_FUNCTION, + SQLTAG_FIELD, + SQLTAG_LOCAL_VARIABLE, + SQLTAG_BLOCK_LABEL, + SQLTAG_PACKAGE, + SQLTAG_PROCEDURE, + SQLTAG_RECORD, + SQLTAG_SUBTYPE, + SQLTAG_TABLE, + SQLTAG_TRIGGER, + SQLTAG_VARIABLE, + SQLTAG_INDEX, + SQLTAG_EVENT, + SQLTAG_PUBLICATION, + SQLTAG_SERVICE, + SQLTAG_DOMAIN, + SQLTAG_VIEW, + SQLTAG_SYNONYM, + SQLTAG_MLTABLE, + SQLTAG_MLCONN, + SQLTAG_MLPROP, + SQLTAG_COUNT +} sqlKind; + +static kindOption SqlKinds [] = { + { TRUE, 'c', "cursor", "cursors" }, + { FALSE, 'd', "prototype", "prototypes" }, + { TRUE, 'f', "function", "functions" }, + { TRUE, 'F', "field", "record fields" }, + { FALSE, 'l', "local", "local variables" }, + { TRUE, 'L', "label", "block label" }, + { TRUE, 'P', "package", "packages" }, + { TRUE, 'p', "procedure", "procedures" }, + { FALSE, 'r', "record", "records" }, + { TRUE, 's', "subtype", "subtypes" }, + { TRUE, 't', "table", "tables" }, + { TRUE, 'T', "trigger", "triggers" }, + { TRUE, 'v', "variable", "variables" }, + { TRUE, 'i', "index", "indexes" }, + { TRUE, 'e', "event", "events" }, + { TRUE, 'U', "publication", "publications" }, + { TRUE, 'R', "service", "services" }, + { TRUE, 'D', "domain", "domains" }, + { TRUE, 'V', "view", "views" }, + { TRUE, 'n', "synonym", "synonyms" }, + { TRUE, 'x', "mltable", "MobiLink Table Scripts" }, + { TRUE, 'y', "mlconn", "MobiLink Conn Scripts" }, + { TRUE, 'z', "mlprop", "MobiLink Properties " } +}; + +static const keywordDesc SqlKeywordTable [] = { + /* keyword keyword ID */ + { "as", KEYWORD_is }, + { "is", KEYWORD_is }, + { "begin", KEYWORD_begin }, + { "body", KEYWORD_body }, + { "cursor", KEYWORD_cursor }, + { "declare", KEYWORD_declare }, + { "end", KEYWORD_end }, + { "function", KEYWORD_function }, + { "if", KEYWORD_if }, + { "else", KEYWORD_else }, + { "elseif", KEYWORD_elseif }, + { "endif", KEYWORD_endif }, + { "loop", KEYWORD_loop }, + { "while", KEYWORD_while }, + { "case", KEYWORD_case }, + { "for", KEYWORD_for }, + { "do", KEYWORD_do }, + { "call", KEYWORD_call }, + { "package", KEYWORD_package }, + { "pragma", KEYWORD_pragma }, + { "procedure", KEYWORD_procedure }, + { "record", KEYWORD_record }, + { "object", KEYWORD_object }, + { "ref", KEYWORD_ref }, + { "rem", KEYWORD_rem }, + { "return", KEYWORD_return }, + { "returns", KEYWORD_returns }, + { "subtype", KEYWORD_subtype }, + { "table", KEYWORD_table }, + { "trigger", KEYWORD_trigger }, + { "type", KEYWORD_type }, + { "index", KEYWORD_index }, + { "event", KEYWORD_event }, + { "publication", KEYWORD_publication }, + { "service", KEYWORD_service }, + { "domain", KEYWORD_domain }, + { "datatype", KEYWORD_datatype }, + { "result", KEYWORD_result }, + { "url", KEYWORD_url }, + { "internal", KEYWORD_internal }, + { "external", KEYWORD_external }, + { "when", KEYWORD_when }, + { "then", KEYWORD_then }, + { "variable", KEYWORD_variable }, + { "exception", KEYWORD_exception }, + { "at", KEYWORD_at }, + { "on", KEYWORD_on }, + { "primary", KEYWORD_primary }, + { "references", KEYWORD_references }, + { "unique", KEYWORD_unique }, + { "check", KEYWORD_check }, + { "constraint", KEYWORD_constraint }, + { "foreign", KEYWORD_foreign }, + { "ml_add_table_script", KEYWORD_ml_table }, + { "ml_add_lang_table_script", KEYWORD_ml_table_lang }, + { "ml_add_dnet_table_script", KEYWORD_ml_table_dnet }, + { "ml_add_java_table_script", KEYWORD_ml_table_java }, + { "ml_add_lang_table_script_chk", KEYWORD_ml_table_chk }, + { "ml_add_connection_script", KEYWORD_ml_conn }, + { "ml_add_lang_connection_script", KEYWORD_ml_conn_lang }, + { "ml_add_dnet_connection_script", KEYWORD_ml_conn_dnet }, + { "ml_add_java_connection_script", KEYWORD_ml_conn_java }, + { "ml_add_lang_conn_script_chk", KEYWORD_ml_conn_chk }, + { "ml_add_property", KEYWORD_ml_prop }, + { "local", KEYWORD_local }, + { "temporary", KEYWORD_temporary }, + { "drop", KEYWORD_drop }, + { "view", KEYWORD_view }, + { "synonym", KEYWORD_synonym }, + { "handler", KEYWORD_handler }, + { "comment", KEYWORD_comment }, + { "create", KEYWORD_create }, + { "go", KEYWORD_go } +}; + +/* + * FUNCTION DECLARATIONS + */ + +/* Recursive calls */ +static void parseBlock (tokenInfo *const token, const boolean local); +static void parseDeclare (tokenInfo *const token, const boolean local); +static void parseKeywords (tokenInfo *const token); +static void parseSqlFile (tokenInfo *const token); + +/* + * FUNCTION DEFINITIONS + */ + +static boolean isIdentChar1 (const int c) +{ + /* + * Other databases are less restrictive on the first character of + * an identifier. + * isIdentChar1 is used to identify the first character of an + * identifier, so we are removing some restrictions. + */ + return (boolean) + (isalpha (c) || c == '@' || c == '_' ); +} + +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '@' || c == '_' || c == '#'); +} + +static boolean isCmdTerm (tokenInfo *const token) +{ + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n isCmdTerm: token same tt:%d tk:%d\n" + , token->type + , token->keyword + ); + ); + + /* + * Based on the various customer sites I have been at + * the most common command delimiters are + * ; + * ~ + * / + * go + * This routine will check for any of these, more + * can easily be added by modifying readToken and + * either adding the character to: + * enum eTokenType + * enum eTokenType + */ + return ( isType (token, TOKEN_SEMICOLON) || + isType (token, TOKEN_TILDE) || + isType (token, TOKEN_FORWARD_SLASH) || + isKeyword (token, KEYWORD_go) + ); +} + +static boolean isMatchedEnd(tokenInfo *const token, int nest_lvl) +{ + boolean terminated = FALSE; + /* + * Since different forms of SQL allow the use of + * BEGIN + * ... + * END + * blocks, some statements may not be terminated using + * the standard delimiters: + * ; + * ~ + * / + * go + * This routine will check to see if we encounter and END + * for the matching nest level of BEGIN ... END statements. + * If we find one, then we can assume, the statement was terminated + * since we have fallen through to the END statement of the BEGIN + * block. + */ + if ( nest_lvl > 0 && isKeyword (token, KEYWORD_end) ) + { + if ( token->begin_end_nest_lvl == nest_lvl ) + terminated = TRUE; + } + + return terminated; +} + +static void buildSqlKeywordHash (void) +{ + const size_t count = sizeof (SqlKeywordTable) / + sizeof (SqlKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &SqlKeywordTable [i]; + addKeyword (p->name, Lang_sql, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->begin_end_nest_lvl = 0; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +/* + * Tag generation functions + */ + +static void makeConstTag (tokenInfo *const token, const sqlKind kind) +{ + if (SqlKinds [kind].enabled) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = SqlKinds [kind].name; + e.kind = SqlKinds [kind].letter; + + makeTagEntry (&e); + } +} + +static void makeSqlTag (tokenInfo *const token, const sqlKind kind) +{ + vString * fulltag; + + if (SqlKinds [kind].enabled) + { + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if ( vStringLength(token->scope) > 0 ) + { + fulltag = vStringNew (); + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + vStringTerminate(fulltag); + vStringCopy(token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (! end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + /* + else if (c == '\\') + { + c = fileGetc(); // This maybe a ' or ". // + vStringPut(string, c); + } + */ + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* Read a C identifier beginning with "firstChar" and places it into "name". +*/ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar1 (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + /* + * Added " to the list of ignores, not sure what this + * might break but it gets by this issue: + * create table "t1" (...) + * + * Darren, the code passes all my tests for both + * Oracle and SQL Anywhere, but maybe you can tell me + * what this may effect. + */ + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ':': token->type = TOKEN_COLON; break; + case ';': token->type = TOKEN_SEMICOLON; break; + case '.': token->type = TOKEN_PERIOD; break; + case ',': token->type = TOKEN_COMMA; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case '~': token->type = TOKEN_TILDE; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + case '=': token->type = TOKEN_EQUAL; break; + + case '\'': + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '-': + c = fileGetc (); + if (c == '-') /* -- is this the start of a comment? */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + else + { + if (!isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + } + break; + + case '<': + case '>': + { + const int initial = c; + int d = fileGetc (); + if (d == initial) + { + if (initial == '<') + token->type = TOKEN_BLOCK_LABEL_BEGIN; + else + token->type = TOKEN_BLOCK_LABEL_END; + } + else + { + fileUngetc (d); + token->type = TOKEN_UNDEFINED; + } + break; + } + + case '\\': + c = fileGetc (); + if (c != '\\' && c != '"' && c != '\'' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_CHARACTER; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '/': + { + int d = fileGetc (); + if ( (d != '*') && /* is this the start of a comment? */ + (d != '/') ) /* is a one line comment? */ + { + token->type = TOKEN_FORWARD_SLASH; + fileUngetc (d); + } + else + { + if (d == '*') + { + do + { + fileSkipToCharacter ('*'); + c = fileGetc (); + if (c == '/') + break; + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + goto getNextChar; + } + else if (d == '/') /* is this the start of a comment? */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + } + break; + } + + default: + if (! isIdentChar1 (c)) + token->type = TOKEN_UNDEFINED; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_sql); + if (isKeyword (token, KEYWORD_rem)) + { + vStringClear (token->string); + fileSkipToCharacter ('\n'); + goto getNextChar; + } + else if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } +} + +/* + * Token parsing functions + */ + +/* + * static void addContext (tokenInfo* const parent, const tokenInfo* const child) + * { + * if (vStringLength (parent->string) > 0) + * { + * vStringCatS (parent->string, "."); + * } + * vStringCatS (parent->string, vStringValue(child->string)); + * vStringTerminate(parent->string); + * } + */ + +static void addToScope (tokenInfo* const token, vString* const extra) +{ + if (vStringLength (token->scope) > 0) + { + vStringCatS (token->scope, "."); + } + vStringCatS (token->scope, vStringValue(extra)); + vStringTerminate(token->scope); +} + +/* + * Scanning functions + */ + +static void findToken (tokenInfo *const token, const tokenType type) +{ + while (! isType (token, type)) + { + readToken (token); + } +} + +static void findCmdTerm (tokenInfo *const token, const boolean check_first) +{ + int begin_end_nest_lvl = token->begin_end_nest_lvl; + + if ( check_first ) + { + if ( isCmdTerm(token) ) + return; + } + do + { + readToken (token); + } while ( !isCmdTerm(token) && !isMatchedEnd(token, begin_end_nest_lvl) ); +} + +static void skipToMatched(tokenInfo *const token) +{ + int nest_level = 0; + tokenType open_token; + tokenType close_token; + + switch (token->type) + { + case TOKEN_OPEN_PAREN: + open_token = TOKEN_OPEN_PAREN; + close_token = TOKEN_CLOSE_PAREN; + break; + case TOKEN_OPEN_CURLY: + open_token = TOKEN_OPEN_CURLY; + close_token = TOKEN_CLOSE_CURLY; + break; + case TOKEN_OPEN_SQUARE: + open_token = TOKEN_OPEN_SQUARE; + close_token = TOKEN_CLOSE_SQUARE; + break; + default: + return; + } + + /* + * This routine will skip to a matching closing token. + * It will also handle nested tokens like the (, ) below. + * ( name varchar(30), text binary(10) ) + */ + + if (isType (token, open_token)) + { + nest_level++; + while (! (isType (token, close_token) && (nest_level == 0))) + { + readToken (token); + if (isType (token, open_token)) + { + nest_level++; + } + if (isType (token, close_token)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + vStringCopy(dest->string, src->string); + vStringCopy(dest->scope, src->scope); +} + +static void skipArgumentList (tokenInfo *const token) +{ + /* + * Other databases can have arguments with fully declared + * datatypes: + * ( name varchar(30), text binary(10) ) + * So we must check for nested open and closing parantheses + */ + + if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ + { + skipToMatched (token); + } +} + +static void parseSubProgram (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + vString * saveScope = vStringNew (); + + /* + * This must handle both prototypes and the body of + * the procedures. + * + * Prototype: + * FUNCTION func_name RETURN integer; + * PROCEDURE proc_name( parameters ); + * Procedure + * FUNCTION GET_ML_USERNAME RETURN VARCHAR2 + * IS + * BEGIN + * RETURN v_sync_user_id; + * END GET_ML_USERNAME; + * + * PROCEDURE proc_name( parameters ) + * IS + * BEGIN + * END; + * CREATE PROCEDURE proc_name( parameters ) + * EXTERNAL NAME ... ; + * CREATE PROCEDURE proc_name( parameters ) + * BEGIN + * END; + * + * CREATE FUNCTION f_GetClassName( + * IN @object VARCHAR(128) + * ,IN @code VARCHAR(128) + * ) + * RETURNS VARCHAR(200) + * DETERMINISTIC + * BEGIN + * + * IF( @object = 'user_state' ) THEN + * SET something = something; + * END IF; + * + * RETURN @name; + * END; + * + * Note, a Package adds scope to the items within. + * create or replace package demo_pkg is + * test_var number; + * function test_func return varchar2; + * function more.test_func2 return varchar2; + * end demo_pkg; + * So the tags generated here, contain the package name: + * demo_pkg.test_var + * demo_pkg.test_func + * demo_pkg.more.test_func2 + */ + const sqlKind kind = isKeyword (token, KEYWORD_function) ? + SQLTAG_FUNCTION : SQLTAG_PROCEDURE; + Assert (isKeyword (token, KEYWORD_function) || + isKeyword (token, KEYWORD_procedure)); + + vStringCopy(saveScope, token->scope); + readToken (token); + copyToken (name, token); + readToken (token); + + if (isType (token, TOKEN_PERIOD)) + { + /* + * If this is an Oracle package, then the token->scope should + * already be set. If this is the case, also add this value to the + * scope. + * If this is not an Oracle package, chances are the scope should be + * blank and the value just read is the OWNER or CREATOR of the + * function and should not be considered part of the scope. + */ + if ( vStringLength(saveScope) > 0 ) + { + addToScope(token, name->string); + } + readToken (token); + copyToken (name, token); + readToken (token); + } + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* Reads to the next token after the TOKEN_CLOSE_PAREN */ + skipArgumentList(token); + } + + if (kind == SQLTAG_FUNCTION) + { + if (isKeyword (token, KEYWORD_return) || isKeyword (token, KEYWORD_returns)) + { + /* Read datatype */ + readToken (token); + /* + * Read token after which could be the + * command terminator if a prototype + * or an open parantheses + */ + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* Reads to the next token after the TOKEN_CLOSE_PAREN */ + skipArgumentList(token); + } + } + } + if( isCmdTerm (token) ) + { + makeSqlTag (name, SQLTAG_PROTOTYPE); + } + else + { + while (!(isKeyword (token, KEYWORD_is) || + isKeyword (token, KEYWORD_begin) || + isKeyword (token, KEYWORD_at) || + isKeyword (token, KEYWORD_internal) || + isKeyword (token, KEYWORD_external) || + isKeyword (token, KEYWORD_url) || + isType (token, TOKEN_EQUAL) || + isCmdTerm (token) + ) + ) + { + if ( isKeyword (token, KEYWORD_result) ) + { + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* Reads to the next token after the TOKEN_CLOSE_PAREN */ + skipArgumentList(token); + } + } else { + readToken (token); + } + } + if (isKeyword (token, KEYWORD_at) || + isKeyword (token, KEYWORD_url) || + isKeyword (token, KEYWORD_internal) || + isKeyword (token, KEYWORD_external) ) + { + addToScope(token, name->string); + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING) || + !isKeyword (token, KEYWORD_NONE) + ) + makeSqlTag (name, kind); + + vStringClear (token->scope); + } + if ( isType (token, TOKEN_EQUAL) ) + readToken (token); + + if ( isKeyword (token, KEYWORD_declare) ) + parseDeclare (token, FALSE); + + if (isKeyword (token, KEYWORD_is) || + isKeyword (token, KEYWORD_begin) ) + { + addToScope(token, name->string); + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING) || + !isKeyword (token, KEYWORD_NONE) + ) + makeSqlTag (name, kind); + + parseBlock (token, TRUE); + vStringClear (token->scope); + } + } + vStringCopy(token->scope, saveScope); + deleteToken (name); + vStringDelete(saveScope); +} + +static void parseRecord (tokenInfo *const token) +{ + /* + * Make it a bit forgiving, this is called from + * multiple functions, parseTable, parseType + */ + if (!isType (token, TOKEN_OPEN_PAREN)) + readToken (token); + + Assert (isType (token, TOKEN_OPEN_PAREN)); + do + { + if ( isType (token, TOKEN_COMMA) || isType (token, TOKEN_OPEN_PAREN) ) + readToken (token); + + /* + * Create table statements can end with various constraints + * which must be excluded from the SQLTAG_FIELD. + * create table t1 ( + * c1 integer, + * c2 char(30), + * c3 numeric(10,5), + * c4 integer, + * constraint whatever, + * primary key(c1), + * foreign key (), + * check () + * ) + */ + if (! (isKeyword(token, KEYWORD_primary) || + isKeyword(token, KEYWORD_references) || + isKeyword(token, KEYWORD_unique) || + isKeyword(token, KEYWORD_check) || + isKeyword(token, KEYWORD_constraint) || + isKeyword(token, KEYWORD_foreign) ) ) + { + if (isType (token, TOKEN_IDENTIFIER) || + isType (token, TOKEN_STRING)) + makeSqlTag (token, SQLTAG_FIELD); + } + + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) || + isType (token, TOKEN_OPEN_PAREN) + )) + { + readToken (token); + /* + * A table structure can look like this: + * create table t1 ( + * c1 integer, + * c2 char(30), + * c3 numeric(10,5), + * c4 integer + * ) + * We can't just look for a COMMA or CLOSE_PAREN + * since that will not deal with the numeric(10,5) + * case. So we need to skip the argument list + * when we find an open paren. + */ + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* Reads to the next token after the TOKEN_CLOSE_PAREN */ + skipArgumentList(token); + } + } + } while (! isType (token, TOKEN_CLOSE_PAREN)); +} + +static void parseType (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + vString * saveScope = vStringNew (); + + vStringCopy(saveScope, token->scope); + /* If a scope has been set, add it to the name */ + addToScope (name, token->scope); + readToken (name); + if (isType (name, TOKEN_IDENTIFIER)) + { + readToken (token); + if (isKeyword (token, KEYWORD_is)) + { + readToken (token); + addToScope (token, name->string); + switch (token->keyword) + { + case KEYWORD_record: + case KEYWORD_object: + makeSqlTag (name, SQLTAG_RECORD); + parseRecord (token); + break; + + case KEYWORD_table: + makeSqlTag (name, SQLTAG_TABLE); + break; + + case KEYWORD_ref: + readToken (token); + if (isKeyword (token, KEYWORD_cursor)) + makeSqlTag (name, SQLTAG_CURSOR); + break; + + default: break; + } + vStringClear (token->scope); + } + } + vStringCopy(token->scope, saveScope); + deleteToken (name); + vStringDelete(saveScope); +} + +static void parseSimple (tokenInfo *const token, const sqlKind kind) +{ + /* This will simply make the tagname from the first word found */ + readToken (token); + if (isType (token, TOKEN_IDENTIFIER) || + isType (token, TOKEN_STRING)) + makeSqlTag (token, kind); +} + +static void parseDeclare (tokenInfo *const token, const boolean local) +{ + /* + * PL/SQL declares are of this format: + * IS|AS + * [declare] + * CURSOR curname ... + * varname1 datatype; + * varname2 datatype; + * varname3 datatype; + * begin + */ + + if (isKeyword (token, KEYWORD_declare)) + readToken (token); + while (! isKeyword (token, KEYWORD_begin) && ! isKeyword (token, KEYWORD_end)) + { + switch (token->keyword) + { + case KEYWORD_cursor: parseSimple (token, SQLTAG_CURSOR); break; + case KEYWORD_function: parseSubProgram (token); break; + case KEYWORD_procedure: parseSubProgram (token); break; + case KEYWORD_subtype: parseSimple (token, SQLTAG_SUBTYPE); break; + case KEYWORD_trigger: parseSimple (token, SQLTAG_TRIGGER); break; + case KEYWORD_type: parseType (token); break; + + default: + if (isType (token, TOKEN_IDENTIFIER)) + { + if (local) + { + makeSqlTag (token, SQLTAG_LOCAL_VARIABLE); + } + else + { + makeSqlTag (token, SQLTAG_VARIABLE); + } + } + break; + } + findToken (token, TOKEN_SEMICOLON); + readToken (token); + } +} + +static void parseDeclareANSI (tokenInfo *const token, const boolean local) +{ + tokenInfo *const type = newToken (); + /* + * ANSI declares are of this format: + * BEGIN + * DECLARE varname1 datatype; + * DECLARE varname2 datatype; + * ... + * + * This differ from PL/SQL where DECLARE preceeds the BEGIN block + * and the DECLARE keyword is not repeated. + */ + while (isKeyword (token, KEYWORD_declare)) + { + readToken (token); + readToken (type); + + if (isKeyword (type, KEYWORD_cursor)) + makeSqlTag (token, SQLTAG_CURSOR); + else if (isKeyword (token, KEYWORD_local) && + isKeyword (type, KEYWORD_temporary)) + { + /* + * DECLARE LOCAL TEMPORARY TABLE table_name ( + * c1 int, + * c2 int + * ); + */ + readToken (token); + if (isKeyword (token, KEYWORD_table)) + { + readToken (token); + if (isType(token, TOKEN_IDENTIFIER) || + isType(token, TOKEN_STRING) ) + { + makeSqlTag (token, SQLTAG_TABLE); + } + } + } + else if (isType (token, TOKEN_IDENTIFIER) || + isType (token, TOKEN_STRING)) + { + if (local) + makeSqlTag (token, SQLTAG_LOCAL_VARIABLE); + else + makeSqlTag (token, SQLTAG_VARIABLE); + } + findToken (token, TOKEN_SEMICOLON); + readToken (token); + } + deleteToken (type); +} + +static void parseLabel (tokenInfo *const token) +{ + /* + * A label has this format: + * <> + * DECLARE + * v_senator VARCHAR2(100) := 'THURMOND, JESSE'; + * BEGIN + * IF total_contributions (v_senator, 'TOBACCO') > 25000 + * THEN + * <> + * DECLARE + * v_senator VARCHAR2(100) := 'WHATEVERIT, TAKES'; + * BEGIN + * ... + */ + + Assert (isType (token, TOKEN_BLOCK_LABEL_BEGIN)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { + makeSqlTag (token, SQLTAG_BLOCK_LABEL); + readToken (token); /* read end of label */ + } +} + +static void parseStatements (tokenInfo *const token, const boolean exit_on_endif ) +{ + boolean isAnsi = TRUE; + boolean stmtTerm = FALSE; + do + { + + if (isType (token, TOKEN_BLOCK_LABEL_BEGIN)) + parseLabel (token); + else + { + switch (token->keyword) + { + case KEYWORD_exception: + /* + * EXCEPTION + * ; + * + * Where an exception handler could be: + * BEGIN + * WHEN OTHERS THEN + * x := x + 3; + * END; + * In this case we need to skip this keyword and + * move on to the next token without reading until + * TOKEN_SEMICOLON; + */ + readToken (token); + continue; + + case KEYWORD_when: + /* + * WHEN statements can be used in exception clauses + * and CASE statements. The CASE statement should skip + * these given below we skip over to an END statement. + * But for an exception clause, we can have: + * EXCEPTION + * WHEN OTHERS THEN + * BEGIN + * x := x + 3; + * END; + * If we skip to the TOKEN_SEMICOLON, we miss the begin + * of a nested BEGIN END block. So read the next token + * after the THEN and restart the LOOP. + */ + while (! isKeyword (token, KEYWORD_then)) + readToken (token); + + readToken (token); + continue; + + case KEYWORD_if: + /* + * We do not want to look for a ; since for an empty + * IF block, it would skip over the END. + * IF...THEN + * END IF; + * + * IF...THEN + * ELSE + * END IF; + * + * IF...THEN + * ELSEIF...THEN + * ELSE + * END IF; + * + * or non-ANSI + * IF ... + * BEGIN + * END + */ + while ( ! isKeyword (token, KEYWORD_then) && + ! isKeyword (token, KEYWORD_begin) ) + { + readToken (token); + } + + if( isKeyword (token, KEYWORD_begin ) ) + { + isAnsi = FALSE; + parseBlock(token, FALSE); + + /* + * Handle the non-Ansi IF blocks. + * parseBlock consumes the END, so if the next + * token in a command terminator (like GO) + * we know we are done with this statement. + */ + if ( isCmdTerm (token) ) + stmtTerm = TRUE; + } + else + { + readToken (token); + + while( ! (isKeyword (token, KEYWORD_end ) || + isKeyword (token, KEYWORD_endif ) ) + ) + { + if ( isKeyword (token, KEYWORD_else) || + isKeyword (token, KEYWORD_elseif) ) + readToken (token); + + parseStatements (token, TRUE); + + if ( isCmdTerm(token) ) + readToken (token); + + } + + /* + * parseStatements returns when it finds an END, an IF + * should follow the END for ANSI anyway. + * IF...THEN + * END IF; + */ + if( isKeyword (token, KEYWORD_end ) ) + readToken (token); + + if( isKeyword (token, KEYWORD_if ) || isKeyword (token, KEYWORD_endif ) ) + { + readToken (token); + if ( isCmdTerm(token) ) + stmtTerm = TRUE; + } + else + { + /* + * Well we need to do something here. + * There are lots of different END statements + * END; + * END CASE; + * ENDIF; + * ENDCASE; + */ + } + } + break; + + case KEYWORD_loop: + case KEYWORD_case: + case KEYWORD_for: + /* + * LOOP... + * END LOOP; + * + * CASE + * WHEN '1' THEN + * END CASE; + * + * FOR loop_name AS cursor_name CURSOR FOR ... + * DO + * END FOR; + */ + if( isKeyword (token, KEYWORD_for ) ) + { + /* loop name */ + readToken (token); + /* AS */ + readToken (token); + + while ( ! isKeyword (token, KEYWORD_is) ) + { + /* + * If this is not an AS keyword this is + * not a proper FOR statement and should + * simply be ignored + */ + return; + } + + while ( ! isKeyword (token, KEYWORD_do) ) + readToken (token); + } + + + readToken (token); + while( ! isKeyword (token, KEYWORD_end ) ) + { + /* + if ( isKeyword (token, KEYWORD_else) || + isKeyword (token, KEYWORD_elseif) ) + readToken (token); + */ + + parseStatements (token, FALSE); + + if ( isCmdTerm(token) ) + readToken (token); + } + + + if( isKeyword (token, KEYWORD_end ) ) + readToken (token); + + /* + * Typically ended with + * END LOOP [loop name]; + * END CASE + * END FOR [loop name]; + */ + if ( isKeyword (token, KEYWORD_loop) || + isKeyword (token, KEYWORD_case) || + isKeyword (token, KEYWORD_for) ) + readToken (token); + + if ( isCmdTerm(token) ) + stmtTerm = TRUE; + + break; + + case KEYWORD_create: + readToken (token); + parseKeywords(token); + break; + + case KEYWORD_declare: + case KEYWORD_begin: + parseBlock (token, TRUE); + break; + + case KEYWORD_end: + break; + + default: + readToken (token); + break; + } + /* + * Not all statements must end in a semi-colon + * begin + * if current publisher <> 'publish' then + * signal UE_FailStatement + * end if + * end; + * The last statement prior to an end ("signal" above) does + * not need a semi-colon, nor does the end if, since it is + * also the last statement prior to the end of the block. + * + * So we must read to the first semi-colon or an END block + */ + while ( ! stmtTerm && + ! ( isKeyword (token, KEYWORD_end) || + (isCmdTerm(token)) ) + ) + { + if ( isKeyword (token, KEYWORD_endif) && + exit_on_endif ) + return; + + if (isType (token, TOKEN_COLON) ) + { + /* + * A : can signal a loop name + * myloop: + * LOOP + * LEAVE myloop; + * END LOOP; + * Unfortunately, labels do not have a + * cmd terminator, therefore we have to check + * if the next token is a keyword and process + * it accordingly. + */ + readToken (token); + if ( isKeyword (token, KEYWORD_loop) || + isKeyword (token, KEYWORD_while) || + isKeyword (token, KEYWORD_for) ) + /* parseStatements (token); */ + return; + } + + readToken (token); + + if (isType (token, TOKEN_OPEN_PAREN) || + isType (token, TOKEN_OPEN_CURLY) || + isType (token, TOKEN_OPEN_SQUARE) ) + skipToMatched (token); + + /* + * Since we know how to parse various statements + * if we detect them, parse them to completion + */ + if (isType (token, TOKEN_BLOCK_LABEL_BEGIN) || + isKeyword (token, KEYWORD_exception) || + isKeyword (token, KEYWORD_loop) || + isKeyword (token, KEYWORD_case) || + isKeyword (token, KEYWORD_for) || + isKeyword (token, KEYWORD_begin) ) + parseStatements (token, FALSE); + else if (isKeyword (token, KEYWORD_if)) + parseStatements (token, TRUE); + + } + } + /* + * We assumed earlier all statements ended with a command terminator. + * See comment above, now, only read if the current token + * is not a command terminator. + */ + if ( isCmdTerm(token) && ! stmtTerm ) + stmtTerm = TRUE; + + } while (! isKeyword (token, KEYWORD_end) && + ! (exit_on_endif && isKeyword (token, KEYWORD_endif) ) && + ! stmtTerm ); +} + +static void parseBlock (tokenInfo *const token, const boolean local) +{ + if (isType (token, TOKEN_BLOCK_LABEL_BEGIN)) + { + parseLabel (token); + readToken (token); + } + if (! isKeyword (token, KEYWORD_begin)) + { + readToken (token); + /* + * These are Oracle style declares which generally come + * between an IS/AS and BEGIN block. + */ + parseDeclare (token, local); + } + if (isKeyword (token, KEYWORD_begin)) + { + readToken (token); + /* + * Check for ANSI declarations which always follow + * a BEGIN statement. This routine will not advance + * the token if none are found. + */ + parseDeclareANSI (token, local); + token->begin_end_nest_lvl++; + while (! isKeyword (token, KEYWORD_end)) + { + parseStatements (token, FALSE); + + if ( isCmdTerm(token) ) + readToken (token); + } + token->begin_end_nest_lvl--; + + /* + * Read the next token (we will assume + * it is the command delimiter) + */ + readToken (token); + + /* + * Check if the END block is terminated + */ + if ( !isCmdTerm (token) ) + { + /* + * Not sure what to do here at the moment. + * I think the routine that calls parseBlock + * must expect the next token has already + * been read since it is possible this + * token is not a command delimiter. + */ + /* findCmdTerm (token, FALSE); */ + } + } +} + +static void parsePackage (tokenInfo *const token) +{ + /* + * Packages can be specified in a number of ways: + * CREATE OR REPLACE PACKAGE pkg_name AS + * or + * CREATE OR REPLACE PACKAGE owner.pkg_name AS + * or by specifying a package body + * CREATE OR REPLACE PACKAGE BODY pkg_name AS + * CREATE OR REPLACE PACKAGE BODY owner.pkg_name AS + */ + tokenInfo *const name = newToken (); + readToken (name); + if (isKeyword (name, KEYWORD_body)) + { + /* + * Ignore the BODY tag since we will process + * the body or prototypes in the same manner + */ + readToken (name); + } + /* Check for owner.pkg_name */ + while (! isKeyword (token, KEYWORD_is)) + { + readToken (token); + if ( isType(token, TOKEN_PERIOD) ) + { + readToken (name); + } + } + if (isKeyword (token, KEYWORD_is)) + { + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + makeSqlTag (name, SQLTAG_PACKAGE); + addToScope (token, name->string); + parseBlock (token, FALSE); + vStringClear (token->scope); + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseTable (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats: + * create table t1 (c1 int); + * create global tempoary table t2 (c1 int); + * create table "t3" (c1 int); + * create table bob.t4 (c1 int); + * create table bob."t5" (c1 int); + * create table "bob"."t6" (c1 int); + * create table bob."t7" (c1 int); + * Proxy tables use this format: + * create existing table bob."t7" AT '...'; + * SQL Server and Sybase formats + * create table OnlyTable ( + * create table dbo.HasOwner ( + * create table [dbo].[HasOwnerSquare] ( + * create table master.dbo.HasDb ( + * create table master..HasDbNoOwner ( + * create table [master].dbo.[HasDbAndOwnerSquare] ( + * create table [master]..[HasDbNoOwnerSquare] ( + */ + + /* This could be a database, owner or table name */ + readToken (name); + if (isType (name, TOKEN_OPEN_SQUARE)) + { + readToken (name); + /* Read close square */ + readToken (token); + } + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * This could be a owner or table name. + * But this is also a special case since the table can be + * referenced with a blank owner: + * dbname..tablename + */ + readToken (name); + if (isType (name, TOKEN_OPEN_SQUARE)) + { + readToken (name); + /* Read close square */ + readToken (token); + } + /* Check if a blank name was provided */ + if (isType (name, TOKEN_PERIOD)) + { + readToken (name); + if (isType (name, TOKEN_OPEN_SQUARE)) + { + readToken (name); + /* Read close square */ + readToken (token); + } + } + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* This can only be the table name */ + readToken (name); + if (isType (name, TOKEN_OPEN_SQUARE)) + { + readToken (name); + /* Read close square */ + readToken (token); + } + readToken (token); + } + } + if (isType (token, TOKEN_OPEN_PAREN)) + { + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + { + makeSqlTag (name, SQLTAG_TABLE); + vStringCopy(token->scope, name->string); + parseRecord (token); + vStringClear (token->scope); + } + } + else if (isKeyword (token, KEYWORD_at)) + { + if (isType (name, TOKEN_IDENTIFIER)) + { + makeSqlTag (name, SQLTAG_TABLE); + } + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseIndex (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + tokenInfo *const owner = newToken (); + + /* + * This deals with these formats + * create index i1 on t1(c1) create index "i2" on t1(c1) + * create virtual unique clustered index "i3" on t1(c1) + * create unique clustered index "i4" on t1(c1) + * create clustered index "i5" on t1(c1) + * create bitmap index "i6" on t1(c1) + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + if ( isKeyword (token, KEYWORD_on) && + (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING) ) ) + { + readToken (owner); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (owner); + readToken (token); + } + addToScope(name, owner->string); + makeSqlTag (name, SQLTAG_INDEX); + } + findCmdTerm (token, FALSE); + deleteToken (name); + deleteToken (owner); +} + +static void parseEvent (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create event e1 handler begin end; + * create event "e2" handler begin end; + * create event dba."e3" handler begin end; + * create event "dba"."e4" handler begin end; + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + } + while (! (isKeyword (token, KEYWORD_handler) || + (isType (token, TOKEN_SEMICOLON))) ) + { + readToken (token); + } + + if ( isKeyword (token, KEYWORD_handler) || + isType (token, TOKEN_SEMICOLON) ) + { + makeSqlTag (name, SQLTAG_EVENT); + } + + if (isKeyword (token, KEYWORD_handler)) + { + readToken (token); + if ( isKeyword (token, KEYWORD_begin) ) + { + parseBlock (token, TRUE); + } + findCmdTerm (token, TRUE); + } + deleteToken (name); +} + +static void parseTrigger (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + tokenInfo *const table = newToken (); + + /* + * This deals with these formats + * create or replace trigger tr1 begin end; + * create trigger "tr2" begin end; + * drop trigger "droptr1"; + * create trigger "tr3" CALL sp_something(); + * create trigger "owner"."tr4" begin end; + * create trigger "tr5" not valid; + * create trigger "tr6" begin end; + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + + while ( !isKeyword (token, KEYWORD_on) && + !isCmdTerm (token) ) + { + readToken (token); + } + + /*if (! isType (token, TOKEN_SEMICOLON) ) */ + if (! isCmdTerm (token) ) + { + readToken (table); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (table); + readToken (token); + } + + while (! (isKeyword (token, KEYWORD_begin) || + (isKeyword (token, KEYWORD_call)) || + ( isCmdTerm (token))) ) + { + if ( isKeyword (token, KEYWORD_declare) ) + { + addToScope(token, name->string); + parseDeclare(token, TRUE); + vStringClear(token->scope); + } + else + readToken (token); + } + + if ( isKeyword (token, KEYWORD_begin) || + isKeyword (token, KEYWORD_call) ) + { + addToScope(name, table->string); + makeSqlTag (name, SQLTAG_TRIGGER); + addToScope(token, table->string); + if ( isKeyword (token, KEYWORD_begin) ) + { + parseBlock (token, TRUE); + } + vStringClear(token->scope); + } + } + + findCmdTerm (token, TRUE); + deleteToken (name); + deleteToken (table); +} + +static void parsePublication (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create or replace publication pu1 () + * create publication "pu2" () + * create publication dba."pu3" () + * create publication "dba"."pu4" () + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + if (isType (token, TOKEN_OPEN_PAREN)) + { + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + { + makeSqlTag (name, SQLTAG_PUBLICATION); + } + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseService (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * CREATE SERVICE s1 TYPE 'HTML' + * AUTHORIZATION OFF USER DBA AS + * SELECT * + * FROM SYS.SYSTABLE; + * CREATE SERVICE "s2" TYPE 'HTML' + * AUTHORIZATION OFF USER DBA AS + * CALL sp_Something(); + */ + + readToken (name); + readToken (token); + if (isKeyword (token, KEYWORD_type)) + { + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + { + makeSqlTag (name, SQLTAG_SERVICE); + } + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseDomain (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * CREATE DOMAIN|DATATYPE [AS] your_name ...; + */ + + readToken (name); + if (isKeyword (name, KEYWORD_is)) + { + readToken (name); + } + readToken (token); + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + { + makeSqlTag (name, SQLTAG_DOMAIN); + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseDrop (tokenInfo *const token) +{ + /* + * This deals with these formats + * DROP TABLE|PROCEDURE|DOMAIN|DATATYPE name; + * + * Just simply skip over these statements. + * They are often confused with PROCEDURE prototypes + * since the syntax is similar, this effectively deals with + * the issue for all types. + */ + + findCmdTerm (token, FALSE); +} + +static void parseVariable (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create variable varname1 integer; + * create variable @varname2 integer; + * create variable "varname3" integer; + * drop variable @varname3; + */ + + readToken (name); + readToken (token); + if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING)) + && !isType (token, TOKEN_SEMICOLON) ) + { + makeSqlTag (name, SQLTAG_VARIABLE); + } + findCmdTerm (token, TRUE); + + deleteToken (name); +} + +static void parseSynonym (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create variable varname1 integer; + * create variable @varname2 integer; + * create variable "varname3" integer; + * drop variable @varname3; + */ + + readToken (name); + readToken (token); + if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING)) + && isKeyword (token, KEYWORD_for) ) + { + makeSqlTag (name, SQLTAG_SYNONYM); + } + findCmdTerm (token, TRUE); + + deleteToken (name); +} + +static void parseView (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create variable varname1 integer; + * create variable @varname2 integer; + * create variable "varname3" integer; + * drop variable @varname3; + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + + } + + while (!(isKeyword (token, KEYWORD_is) || + isType (token, TOKEN_SEMICOLON) + )) + { + readToken (token); + } + + if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING)) + && isKeyword (token, KEYWORD_is) ) + { + makeSqlTag (name, SQLTAG_VIEW); + } + + findCmdTerm (token, TRUE); + + deleteToken (name); +} + +static void parseMLTable (tokenInfo *const token) +{ + tokenInfo *const version = newToken (); + tokenInfo *const table = newToken (); + tokenInfo *const event = newToken (); + + /* + * This deals with these formats + * call dbo.ml_add_table_script( 'version', 'table_name', 'event', + * 'some SQL statement' + * ); + */ + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + readToken (version); + readToken (token); + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) + )) + { + readToken (token); + } + + if (isType (token, TOKEN_COMMA)) + { + readToken (table); + readToken (token); + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) + )) + { + readToken (token); + } + + if (isType (token, TOKEN_COMMA)) + { + readToken (event); + + if (isType (version, TOKEN_STRING) && + isType (table, TOKEN_STRING) && + isType (event, TOKEN_STRING) ) + { + addToScope(version, table->string); + addToScope(version, event->string); + makeSqlTag (version, SQLTAG_MLTABLE); + } + } + if( !isType (token, TOKEN_CLOSE_PAREN) ) + findToken (token, TOKEN_CLOSE_PAREN); + } + } + + findCmdTerm (token, TRUE); + + deleteToken (version); + deleteToken (table); + deleteToken (event); +} + +static void parseMLConn (tokenInfo *const token) +{ + tokenInfo *const version = newToken (); + tokenInfo *const event = newToken (); + + /* + * This deals with these formats + * call ml_add_connection_script( 'version', 'event', + * 'some SQL statement' + * ); + */ + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + readToken (version); + readToken (token); + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) + )) + { + readToken (token); + } + + if (isType (token, TOKEN_COMMA)) + { + readToken (event); + + if (isType (version, TOKEN_STRING) && + isType (event, TOKEN_STRING) ) + { + addToScope(version, event->string); + makeSqlTag (version, SQLTAG_MLCONN); + } + } + if( !isType (token, TOKEN_CLOSE_PAREN) ) + findToken (token, TOKEN_CLOSE_PAREN); + + } + + findCmdTerm (token, TRUE); + + deleteToken (version); + deleteToken (event); +} + +static void parseMLProp (tokenInfo *const token) +{ + tokenInfo *const component = newToken (); + tokenInfo *const prop_set_name = newToken (); + tokenInfo *const prop_name = newToken (); + + /* + * This deals with these formats + * ml_add_property ( + * 'comp_name', + * 'prop_set_name', + * 'prop_name', + * 'prop_value' + * ) + */ + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + readToken (component); + readToken (token); + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) + )) + { + readToken (token); + } + + if (isType (token, TOKEN_COMMA)) + { + readToken (prop_set_name); + readToken (token); + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) + )) + { + readToken (token); + } + + if (isType (token, TOKEN_COMMA)) + { + readToken (prop_name); + + if (isType (component, TOKEN_STRING) && + isType (prop_set_name, TOKEN_STRING) && + isType (prop_name, TOKEN_STRING) ) + { + addToScope(component, prop_set_name->string); + addToScope(component, prop_name->string); + makeSqlTag (component, SQLTAG_MLPROP); + } + } + if( !isType (token, TOKEN_CLOSE_PAREN) ) + findToken (token, TOKEN_CLOSE_PAREN); + } + } + + findCmdTerm (token, TRUE); + + deleteToken (component); + deleteToken (prop_set_name); + deleteToken (prop_name); +} + +static void parseComment (tokenInfo *const token) +{ + /* + * This deals with this statement: + * COMMENT TO PRESERVE FORMAT ON PROCEDURE "DBA"."test" IS + * {create PROCEDURE DBA."test"() + * BEGIN + * signal dave; + * END + * } + * ; + * The comment can contain anything between the CURLY + * braces + * COMMENT ON USER "admin" IS + * 'Administration Group' + * ; + * Or it could be a simple string with no curly braces + */ + while (! isKeyword (token, KEYWORD_is)) + { + readToken (token); + } + readToken (token); + if ( isType(token, TOKEN_OPEN_CURLY) ) + { + findToken (token, TOKEN_CLOSE_CURLY); + } + + findCmdTerm (token, TRUE); +} + + +static void parseKeywords (tokenInfo *const token) +{ + switch (token->keyword) + { + case KEYWORD_begin: parseBlock (token, FALSE); break; + case KEYWORD_comment: parseComment (token); break; + case KEYWORD_cursor: parseSimple (token, SQLTAG_CURSOR); break; + case KEYWORD_datatype: parseDomain (token); break; + case KEYWORD_declare: parseBlock (token, FALSE); break; + case KEYWORD_domain: parseDomain (token); break; + case KEYWORD_drop: parseDrop (token); break; + case KEYWORD_event: parseEvent (token); break; + case KEYWORD_function: parseSubProgram (token); break; + case KEYWORD_if: parseStatements (token, FALSE); break; + case KEYWORD_index: parseIndex (token); break; + case KEYWORD_ml_table: parseMLTable (token); break; + case KEYWORD_ml_table_lang: parseMLTable (token); break; + case KEYWORD_ml_table_dnet: parseMLTable (token); break; + case KEYWORD_ml_table_java: parseMLTable (token); break; + case KEYWORD_ml_table_chk: parseMLTable (token); break; + case KEYWORD_ml_conn: parseMLConn (token); break; + case KEYWORD_ml_conn_lang: parseMLConn (token); break; + case KEYWORD_ml_conn_dnet: parseMLConn (token); break; + case KEYWORD_ml_conn_java: parseMLConn (token); break; + case KEYWORD_ml_conn_chk: parseMLConn (token); break; + case KEYWORD_ml_prop: parseMLProp (token); break; + case KEYWORD_package: parsePackage (token); break; + case KEYWORD_procedure: parseSubProgram (token); break; + case KEYWORD_publication: parsePublication (token); break; + case KEYWORD_service: parseService (token); break; + case KEYWORD_subtype: parseSimple (token, SQLTAG_SUBTYPE); break; + case KEYWORD_synonym: parseSynonym (token); break; + case KEYWORD_table: parseTable (token); break; + case KEYWORD_trigger: parseTrigger (token); break; + case KEYWORD_type: parseType (token); break; + case KEYWORD_variable: parseVariable (token); break; + case KEYWORD_view: parseView (token); break; + default: break; + } +} + +static void parseSqlFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_BLOCK_LABEL_BEGIN)) + parseLabel (token); + else + parseKeywords (token); + } while (! isKeyword (token, KEYWORD_end)); +} + +static void initialize (const langType language) +{ + Assert (sizeof (SqlKinds) / sizeof (SqlKinds [0]) == SQLTAG_COUNT); + Lang_sql = language; + buildSqlKeywordHash (); +} + +static void findSqlTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception = (exception_t) (setjmp (Exception)); + + while (exception == ExceptionNone) + parseSqlFile (token); + + deleteToken (token); +} + +extern parserDefinition* SqlParser (void) +{ + static const char *const extensions [] = { "sql", NULL }; + parserDefinition* def = parserNew ("SQL"); + def->kinds = SqlKinds; + def->kindCount = KIND_COUNT (SqlKinds); + def->extensions = extensions; + def->parser = findSqlTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/third_party/ctags/strlist.c b/third_party/ctags/strlist.c new file mode 100644 index 000000000..d56eb019a --- /dev/null +++ b/third_party/ctags/strlist.c @@ -0,0 +1,283 @@ +// clang-format off +/* +* $Id: strlist.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1999-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions managing resizable string lists. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#ifdef HAVE_FNMATCH_H +#include "third_party/musl/fnmatch.h" +#endif + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/strlist.h" + +/* +* FUNCTION DEFINITIONS +*/ + +extern stringList *stringListNew (void) +{ + stringList* const result = xMalloc (1, stringList); + result->max = 0; + result->count = 0; + result->list = NULL; + return result; +} + +extern void stringListAdd (stringList *const current, vString *string) +{ + enum { incrementalIncrease = 10 }; + Assert (current != NULL); + if (current->list == NULL) + { + Assert (current->max == 0); + current->count = 0; + current->max = incrementalIncrease; + current->list = xMalloc (current->max, vString*); + } + else if (current->count == current->max) + { + current->max += incrementalIncrease; + current->list = xRealloc (current->list, current->max, vString*); + } + current->list [current->count++] = string; +} + +extern void stringListRemoveLast (stringList *const current) +{ + Assert (current != NULL); + Assert (current->count > 0); + --current->count; + current->list [current->count] = NULL; +} + +/* Combine list `from' into `current', deleting `from' */ +extern void stringListCombine ( + stringList *const current, stringList *const from) +{ + unsigned int i; + Assert (current != NULL); + Assert (from != NULL); + for (i = 0 ; i < from->count ; ++i) + { + stringListAdd (current, from->list [i]); + from->list [i] = NULL; + } + stringListDelete (from); +} + +extern stringList* stringListNewFromArgv (const char* const* const argv) +{ + stringList* const result = stringListNew (); + const char *const *p; + Assert (argv != NULL); + for (p = argv ; *p != NULL ; ++p) + stringListAdd (result, vStringNewInit (*p)); + return result; +} + +extern stringList* stringListNewFromFile (const char* const fileName) +{ + stringList* result = NULL; + FILE* const fp = fopen (fileName, "r"); + if (fp != NULL) + { + result = stringListNew (); + while (! feof (fp)) + { + vString* const str = vStringNew (); + readLine (str, fp); + vStringStripTrailing (str); + if (vStringLength (str) > 0) + stringListAdd (result, str); + else + vStringDelete (str); + } + } + return result; +} + +extern unsigned int stringListCount (const stringList *const current) +{ + Assert (current != NULL); + return current->count; +} + +extern vString* stringListItem ( + const stringList *const current, const unsigned int indx) +{ + Assert (current != NULL); + return current->list [indx]; +} + +extern vString* stringListLast (const stringList *const current) +{ + Assert (current != NULL); + Assert (current->count > 0); + return current->list [current->count - 1]; +} + +extern void stringListClear (stringList *const current) +{ + unsigned int i; + Assert (current != NULL); + for (i = 0 ; i < current->count ; ++i) + { + vStringDelete (current->list [i]); + current->list [i] = NULL; + } + current->count = 0; +} + +extern void stringListDelete (stringList *const current) +{ + if (current != NULL) + { + if (current->list != NULL) + { + stringListClear (current); + eFree (current->list); + current->list = NULL; + } + current->max = 0; + current->count = 0; + eFree (current); + } +} + +static boolean compareString ( + const char *const string, vString *const itm) +{ + return (boolean) (strcmp (string, vStringValue (itm)) == 0); +} + +static boolean compareStringInsensitive ( + const char *const string, vString *const itm) +{ + return (boolean) (strcasecmp (string, vStringValue (itm)) == 0); +} + +static int stringListIndex ( + const stringList *const current, + const char *const string, + boolean (*test)(const char *s, vString *const vs)) +{ + int result = -1; + unsigned int i; + Assert (current != NULL); + Assert (string != NULL); + Assert (test != NULL); + for (i = 0 ; result == -1 && i < current->count ; ++i) + if ((*test)(string, current->list [i])) + result = i; + return result; +} + +extern boolean stringListHas ( + const stringList *const current, const char *const string) +{ + boolean result = FALSE; + Assert (current != NULL); + result = stringListIndex (current, string, compareString) != -1; + return result; +} + +extern boolean stringListHasInsensitive ( + const stringList *const current, const char *const string) +{ + boolean result = FALSE; + Assert (current != NULL); + Assert (string != NULL); + result = stringListIndex (current, string, compareStringInsensitive) != -1; + return result; +} + +extern boolean stringListHasTest ( + const stringList *const current, boolean (*test)(const char *s)) +{ + boolean result = FALSE; + unsigned int i; + Assert (current != NULL); + for (i = 0 ; ! result && i < current->count ; ++i) + result = (*test)(vStringValue (current->list [i])); + return result; +} + +extern boolean stringListRemoveExtension ( + stringList* const current, const char* const extension) +{ + boolean result = FALSE; + int where; +#ifdef CASE_INSENSITIVE_FILENAMES + where = stringListIndex (current, extension, compareStringInsensitive); +#else + where = stringListIndex (current, extension, compareString); +#endif + if (where != -1) + { + memmove (current->list + where, current->list + where + 1, + (current->count - where) * sizeof (*current->list)); + current->list [current->count - 1] = NULL; + --current->count; + result = TRUE; + } + return result; +} + +extern boolean stringListExtensionMatched ( + const stringList* const current, const char* const extension) +{ +#ifdef CASE_INSENSITIVE_FILENAMES + return stringListHasInsensitive (current, extension); +#else + return stringListHas (current, extension); +#endif +} + +static boolean fileNameMatched ( + const vString* const vpattern, const char* const fileName) +{ + const char* const pattern = vStringValue (vpattern); +#if defined (HAVE_FNMATCH) + return (boolean) (fnmatch (pattern, fileName, 0) == 0); +#elif defined (CASE_INSENSITIVE_FILENAMES) + return (boolean) (strcasecmp (pattern, fileName) == 0); +#else + return (boolean) (strcmp (pattern, fileName) == 0); +#endif +} + +extern boolean stringListFileMatched ( + const stringList* const current, const char* const fileName) +{ + boolean result = FALSE; + unsigned int i; + for (i = 0 ; ! result && i < stringListCount (current) ; ++i) + result = fileNameMatched (stringListItem (current, i), fileName); + return result; +} + +extern void stringListPrint (const stringList *const current) +{ + unsigned int i; + Assert (current != NULL); + for (i = 0 ; i < current->count ; ++i) + printf ("%s%s", (i > 0) ? ", " : "", vStringValue (current->list [i])); +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/strlist.h b/third_party/ctags/strlist.h new file mode 100644 index 000000000..74780878a --- /dev/null +++ b/third_party/ctags/strlist.h @@ -0,0 +1,55 @@ +// clang-format off +/* +* $Id: strlist.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1999-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Defines external interface to resizable string lists. +*/ +#ifndef _STRLIST_H +#define _STRLIST_H + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "third_party/ctags/vstring.h" + +/* +* DATA DECLARATIONS +*/ +typedef struct sStringList { + unsigned int max; + unsigned int count; + vString **list; +} stringList; + +/* +* FUNCTION PROTOTYPES +*/ +extern stringList *stringListNew (void); +extern void stringListAdd (stringList *const current, vString *string); +extern void stringListRemoveLast (stringList *const current); +extern void stringListCombine (stringList *const current, stringList *const from); +extern stringList* stringListNewFromArgv (const char* const* const list); +extern stringList* stringListNewFromFile (const char* const fileName); +extern void stringListClear (stringList *const current); +extern unsigned int stringListCount (const stringList *const current); +extern vString* stringListItem (const stringList *const current, const unsigned int indx); +extern vString* stringListLast (const stringList *const current); +extern void stringListDelete (stringList *const current); +extern boolean stringListHasInsensitive (const stringList *const current, const char *const string); +extern boolean stringListHas (const stringList *const current, const char *const string); +extern boolean stringListHasTest (const stringList *const current, boolean (*test)(const char *s)); +extern boolean stringListRemoveExtension (stringList* const current, const char* const extension); +extern boolean stringListExtensionMatched (const stringList* const list, const char* const extension); +extern boolean stringListFileMatched (const stringList* const list, const char* const str); +extern void stringListPrint (const stringList *const current); + +#endif /* _STRLIST_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/tcl.c b/third_party/ctags/tcl.c new file mode 100644 index 000000000..c32e12015 --- /dev/null +++ b/third_party/ctags/tcl.c @@ -0,0 +1,118 @@ +// clang-format off +/* +* $Id: tcl.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for TCL scripts. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_CLASS, K_METHOD, K_PROCEDURE +} tclKind; + +static kindOption TclKinds [] = { + { TRUE, 'c', "class", "classes" }, + { TRUE, 'm', "method", "methods" }, + { TRUE, 'p', "procedure", "procedures" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static const unsigned char *makeTclTag ( + const unsigned char *cp, + vString *const name, + const tclKind kind) +{ + vStringClear (name); + while ((int) *cp != '\0' && ! isspace ((int) *cp)) + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, TclKinds, kind); + return cp; +} + +static boolean match (const unsigned char *line, const char *word) +{ + return (boolean) (strncmp ((const char*) line, word, strlen (word)) == 0); +} + +static void findTclTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp; + + while (isspace (line [0])) + ++line; + + if (line [0] == '\0' || line [0] == '#') + continue; + + /* read first word */ + for (cp = line ; *cp != '\0' && ! isspace ((int) *cp) ; ++cp) + ; + if (! isspace ((int) *cp)) + continue; + while (isspace ((int) *cp)) + ++cp; + /* Now `line' points at first word and `cp' points at next word */ + + if (match (line, "proc")) + cp = makeTclTag (cp, name, K_PROCEDURE); + else if (match (line, "class") || match (line, "itcl::class")) + cp = makeTclTag (cp, name, K_CLASS); + else if (match (line, "public") || + match (line, "protected") || + match (line, "private")) + { + if (match (cp, "method")) + { + cp += 6; + while (isspace ((int) *cp)) + ++cp; + cp = makeTclTag (cp, name, K_METHOD); + } + } + } + vStringDelete (name); +} + +extern parserDefinition* TclParser (void) +{ + static const char *const extensions [] = { "tcl", "tk", "wish", "itcl", NULL }; + parserDefinition* def = parserNew ("Tcl"); + def->kinds = TclKinds; + def->kindCount = KIND_COUNT (TclKinds); + def->extensions = extensions; + def->parser = findTclTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/tex.c b/third_party/ctags/tex.c new file mode 100644 index 000000000..b5e6304b0 --- /dev/null +++ b/third_party/ctags/tex.c @@ -0,0 +1,523 @@ +// clang-format off +/* + * $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $ + * + * Copyright (c) 2008, David Fishburn + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for TeX language files. + * + * Tex language reference: + * http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX + */ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ +#include "libc/str/str.h" /* to define isalpha () */ +#include "libc/runtime/runtime.h" +#ifdef DEBUG +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#endif + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_chapter, + KEYWORD_section, + KEYWORD_subsection, + KEYWORD_subsubsection, + KEYWORD_part, + KEYWORD_paragraph, + KEYWORD_subparagraph, + KEYWORD_include +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_CHARACTER, + TOKEN_CLOSE_PAREN, + TOKEN_COMMA, + TOKEN_KEYWORD, + TOKEN_OPEN_PAREN, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_QUESTION_MARK, + TOKEN_STAR +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_js; + +static jmp_buf Exception; + +typedef enum { + TEXTAG_CHAPTER, + TEXTAG_SECTION, + TEXTAG_SUBSECTION, + TEXTAG_SUBSUBSECTION, + TEXTAG_PART, + TEXTAG_PARAGRAPH, + TEXTAG_SUBPARAGRAPH, + TEXTAG_INCLUDE, + TEXTAG_COUNT +} texKind; + +static kindOption TexKinds [] = { + { TRUE, 'c', "chapter", "chapters" }, + { TRUE, 's', "section", "sections" }, + { TRUE, 'u', "subsection", "subsections" }, + { TRUE, 'b', "subsubsection", "subsubsections" }, + { TRUE, 'p', "part", "parts" }, + { TRUE, 'P', "paragraph", "paragraphs" }, + { TRUE, 'G', "subparagraph", "subparagraphs" }, + { TRUE, 'i', "include", "includes" } +}; + +static const keywordDesc TexKeywordTable [] = { + /* keyword keyword ID */ + { "chapter", KEYWORD_chapter }, + { "section", KEYWORD_section }, + { "subsection", KEYWORD_subsection }, + { "subsubsection", KEYWORD_subsubsection }, + { "part", KEYWORD_part }, + { "paragraph", KEYWORD_paragraph }, + { "subparagraph", KEYWORD_subparagraph }, + { "include", KEYWORD_include } +}; + +/* + * FUNCTION DEFINITIONS + */ + +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '_' || c == '#' || c == '-' || c == '.'); +} + +static void buildTexKeywordHash (void) +{ + const size_t count = sizeof (TexKeywordTable) / + sizeof (TexKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &TexKeywordTable [i]; + addKeyword (p->name, Lang_js, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +/* + * Tag generation functions + */ + +static void makeConstTag (tokenInfo *const token, const texKind kind) +{ + if (TexKinds [kind].enabled ) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = TexKinds [kind].name; + e.kind = TexKinds [kind].letter; + + makeTagEntry (&e); + } +} + +static void makeTexTag (tokenInfo *const token, texKind kind) +{ + vString * fulltag; + + if (TexKinds [kind].enabled) + { + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if ( vStringLength (token->scope) > 0 ) + { + fulltag = vStringNew (); + vStringCopy (fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue (token->string)); + vStringTerminate (fulltag); + vStringCopy (token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (! end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\') + { + c = fileGetc(); /* This maybe a ' or ". */ + vStringPut (string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* + * Read a C identifier beginning with "firstChar" and places it into + * "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ',': token->type = TOKEN_COMMA; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + case '*': token->type = TOKEN_STAR; break; + + case '\'': + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '\\': + /* + * All Tex tags start with a backslash. + * Check if the next character is an alpha character + * else it is not a potential tex tag. + */ + c = fileGetc (); + if (! isalpha (c)) + fileUngetc (c); + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_js); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + + case '%': + fileSkipToCharacter ('\n'); /* % are single line comments */ + goto getNextChar; + break; + + default: + if (! isIdentChar (c)) + token->type = TOKEN_UNDEFINED; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->type = TOKEN_IDENTIFIER; + } + break; + } +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + vStringCopy (dest->string, src->string); + vStringCopy (dest->scope, src->scope); +} + +/* + * Scanning functions + */ + +static boolean parseTag (tokenInfo *const token, texKind kind) +{ + tokenInfo *const name = newToken (); + vString * fullname; + boolean useLongName = TRUE; + + fullname = vStringNew (); + vStringClear (fullname); + + /* + * Tex tags are of these formats: + * \keyword{any number of words} + * \keyword[short desc]{any number of words} + * \keyword*[short desc]{any number of words} + * + * When a keyword is found, loop through all words within + * the curly braces for the tag name. + */ + + if (isType (token, TOKEN_KEYWORD)) + { + copyToken (name, token); + readToken (token); + } + + if (isType (token, TOKEN_OPEN_SQUARE)) + { + useLongName = FALSE; + + readToken (token); + while (! isType (token, TOKEN_CLOSE_SQUARE) ) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (fullname->length > 0) + vStringCatS (fullname, " "); + vStringCatS (fullname, vStringValue (token->string)); + } + readToken (token); + } + vStringTerminate (fullname); + vStringCopy (name->string, fullname); + makeTexTag (name, kind); + } + + if (isType (token, TOKEN_STAR)) + { + readToken (token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + readToken (token); + while (! isType (token, TOKEN_CLOSE_CURLY) ) + { + /* if (isType (token, TOKEN_IDENTIFIER) && useLongName) */ + if (useLongName) + { + if (fullname->length > 0) + vStringCatS (fullname, " "); + vStringCatS (fullname, vStringValue (token->string)); + } + readToken (token); + } + if (useLongName) + { + vStringTerminate (fullname); + vStringCopy (name->string, fullname); + makeTexTag (name, kind); + } + } + + deleteToken (name); + vStringDelete (fullname); + return TRUE; +} + +static void parseTexFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_chapter: + parseTag (token, TEXTAG_CHAPTER); + break; + case KEYWORD_section: + parseTag (token, TEXTAG_SECTION); + break; + case KEYWORD_subsection: + parseTag (token, TEXTAG_SUBSECTION); + break; + case KEYWORD_subsubsection: + parseTag (token, TEXTAG_SUBSUBSECTION); + break; + case KEYWORD_part: + parseTag (token, TEXTAG_PART); + break; + case KEYWORD_paragraph: + parseTag (token, TEXTAG_PARAGRAPH); + break; + case KEYWORD_subparagraph: + parseTag (token, TEXTAG_SUBPARAGRAPH); + break; + case KEYWORD_include: + parseTag (token, TEXTAG_INCLUDE); + break; + default: + break; + } + } + } while (TRUE); +} + +static void initialize (const langType language) +{ + Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT); + Lang_js = language; + buildTexKeywordHash (); +} + +static void findTexTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + parseTexFile (token); + + deleteToken (token); +} + +/* Create parser definition stucture */ +extern parserDefinition* TexParser (void) +{ + static const char *const extensions [] = { "tex", NULL }; + parserDefinition *const def = parserNew ("Tex"); + def->extensions = extensions; + /* + * New definitions for parsing instead of regex + */ + def->kinds = TexKinds; + def->kindCount = KIND_COUNT (TexKinds); + def->parser = findTexTags; + def->initialize = initialize; + + return def; +} +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/third_party/ctags/verilog.c b/third_party/ctags/verilog.c new file mode 100644 index 000000000..7cae04426 --- /dev/null +++ b/third_party/ctags/verilog.c @@ -0,0 +1,343 @@ +// clang-format off +/* +* $Id: verilog.c 753 2010-02-27 17:53:32Z elliotth $ +* +* Copyright (c) 2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the Verilog HDL +* (Hardware Description Language). +* +* Language definition documents: +* http://www.eg.bucknell.edu/~cs320/verilog/verilog-manual.html +* http://www.sutherland-hdl.com/on-line_ref_guide/vlog_ref_top.html +* http://www.verilog.com/VerilogBNF.html +* http://eesun.free.fr/DOC/VERILOG/verilog_manual1.html +*/ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/runtime/runtime.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/get.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +/* + * DATA DECLARATIONS + */ +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +typedef enum { + K_UNDEFINED = -1, + K_CONSTANT, + K_EVENT, + K_FUNCTION, + K_MODULE, + K_NET, + K_PORT, + K_REGISTER, + K_TASK +} verilogKind; + +typedef struct { + const char *keyword; + verilogKind kind; +} keywordAssoc; + +/* + * DATA DEFINITIONS + */ +static int Ungetc; +static int Lang_verilog; +static jmp_buf Exception; + +static kindOption VerilogKinds [] = { + { TRUE, 'c', "constant", "constants (define, parameter, specparam)" }, + { TRUE, 'e', "event", "events" }, + { TRUE, 'f', "function", "functions" }, + { TRUE, 'm', "module", "modules" }, + { TRUE, 'n', "net", "net data types" }, + { TRUE, 'p', "port", "ports" }, + { TRUE, 'r', "register", "register data types" }, + { TRUE, 't', "task", "tasks" } +}; + +static keywordAssoc VerilogKeywordTable [] = { + { "`define", K_CONSTANT }, + { "event", K_EVENT }, + { "function", K_FUNCTION }, + { "inout", K_PORT }, + { "input", K_PORT }, + { "integer", K_REGISTER }, + { "module", K_MODULE }, + { "output", K_PORT }, + { "parameter", K_CONSTANT }, + { "real", K_REGISTER }, + { "realtime", K_REGISTER }, + { "reg", K_REGISTER }, + { "specparam", K_CONSTANT }, + { "supply0", K_NET }, + { "supply1", K_NET }, + { "task", K_TASK }, + { "time", K_REGISTER }, + { "tri0", K_NET }, + { "tri1", K_NET }, + { "triand", K_NET }, + { "tri", K_NET }, + { "trior", K_NET }, + { "trireg", K_NET }, + { "wand", K_NET }, + { "wire", K_NET }, + { "wor", K_NET } +}; + +/* + * FUNCTION DEFINITIONS + */ + +static void initialize (const langType language) +{ + size_t i; + const size_t count = + sizeof (VerilogKeywordTable) / sizeof (VerilogKeywordTable [0]); + Lang_verilog = language; + for (i = 0 ; i < count ; ++i) + { + const keywordAssoc* const p = &VerilogKeywordTable [i]; + addKeyword (p->keyword, language, (int) p->kind); + } +} + +static void vUngetc (int c) +{ + Assert (Ungetc == '\0'); + Ungetc = c; +} + +static int vGetc (void) +{ + int c; + if (Ungetc == '\0') + c = fileGetc (); + else + { + c = Ungetc; + Ungetc = '\0'; + } + if (c == '/') + { + int c2 = fileGetc (); + if (c2 == EOF) + longjmp (Exception, (int) ExceptionEOF); + else if (c2 == '/') /* strip comment until end-of-line */ + { + do + c = fileGetc (); + while (c != '\n' && c != EOF); + } + else if (c2 == '*') /* strip block comment */ + { + c = skipOverCComment(); + } + else + { + fileUngetc (c2); + } + } + else if (c == '"') /* strip string contents */ + { + int c2; + do + c2 = fileGetc (); + while (c2 != '"' && c2 != EOF); + c = '@'; + } + if (c == EOF) + longjmp (Exception, (int) ExceptionEOF); + return c; +} + +static boolean isIdentifierCharacter (const int c) +{ + return (boolean)(isalnum (c) || c == '_' || c == '`'); +} + +static int skipWhite (int c) +{ + while (isspace (c)) + c = vGetc (); + return c; +} + +static int skipPastMatch (const char *const pair) +{ + const int begin = pair [0], end = pair [1]; + int matchLevel = 1; + int c; + do + { + c = vGetc (); + if (c == begin) + ++matchLevel; + else if (c == end) + --matchLevel; + } + while (matchLevel > 0); + return vGetc (); +} + +static boolean readIdentifier (vString *const name, int c) +{ + vStringClear (name); + if (isIdentifierCharacter (c)) + { + while (isIdentifierCharacter (c)) + { + vStringPut (name, c); + c = vGetc (); + } + vUngetc (c); + vStringTerminate (name); + } + return (boolean)(name->length > 0); +} + +static void tagNameList (const verilogKind kind, int c) +{ + vString *name = vStringNew (); + boolean repeat; + Assert (isIdentifierCharacter (c)); + do + { + repeat = FALSE; + if (isIdentifierCharacter (c)) + { + readIdentifier (name, c); + makeSimpleTag (name, VerilogKinds, kind); + } + else + break; + c = skipWhite (vGetc ()); + if (c == '[') + c = skipPastMatch ("[]"); + c = skipWhite (c); + if (c == '=') + { + c = skipWhite (vGetc ()); + if (c == '{') + skipPastMatch ("{}"); + else + { + do + c = vGetc (); + while (c != ',' && c != ';'); + } + } + if (c == ',') + { + c = skipWhite (vGetc ()); + repeat = TRUE; + } + else + repeat = FALSE; + } while (repeat); + vStringDelete (name); + vUngetc (c); +} + +static void findTag (vString *const name) +{ + const verilogKind kind = (verilogKind) lookupKeyword (vStringValue (name), Lang_verilog); + if (kind == K_CONSTANT && vStringItem (name, 0) == '`') + { + /* Bug #961001: Verilog compiler directives are line-based. */ + int c = skipWhite (vGetc ()); + readIdentifier (name, c); + makeSimpleTag (name, VerilogKinds, kind); + /* Skip the rest of the line. */ + do { + c = vGetc(); + } while (c != '\n'); + vUngetc (c); + } + else if (kind != K_UNDEFINED) + { + int c = skipWhite (vGetc ()); + + /* Many keywords can have bit width. + * reg [3:0] net_name; + * inout [(`DBUSWIDTH-1):0] databus; + */ + if (c == '(') + c = skipPastMatch ("()"); + c = skipWhite (c); + if (c == '[') + c = skipPastMatch ("[]"); + c = skipWhite (c); + if (c == '#') + { + c = vGetc (); + if (c == '(') + c = skipPastMatch ("()"); + } + c = skipWhite (c); + if (isIdentifierCharacter (c)) + tagNameList (kind, c); + } +} + +static void findVerilogTags (void) +{ + vString *const name = vStringNew (); + volatile boolean newStatement = TRUE; + volatile int c = '\0'; + exception_t exception = (exception_t) setjmp (Exception); + + if (exception == ExceptionNone) while (c != EOF) + { + c = vGetc (); + switch (c) + { + case ';': + case '\n': + newStatement = TRUE; + break; + + case ' ': + case '\t': + break; + + default: + if (newStatement && readIdentifier (name, c)) + findTag (name); + newStatement = FALSE; + break; + } + } + vStringDelete (name); +} + +extern parserDefinition* VerilogParser (void) +{ + static const char *const extensions [] = { "v", NULL }; + parserDefinition* def = parserNew ("Verilog"); + def->kinds = VerilogKinds; + def->kindCount = KIND_COUNT (VerilogKinds); + def->extensions = extensions; + def->parser = findVerilogTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/third_party/ctags/vhdl.c b/third_party/ctags/vhdl.c new file mode 100644 index 000000000..00d04f4df --- /dev/null +++ b/third_party/ctags/vhdl.c @@ -0,0 +1,837 @@ +// clang-format off +/* +* $Id: vhdl.c 652 2008-04-18 03:51:47Z elliotth $ +* +* Copyright (c) 2008, Nicolas Vincent +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for VHDL files. +*/ + +/* + * INCLUDE FILES + */ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/str/str.h" /* to define isalpha () */ +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/runtime/runtime.h" + +#include "third_party/ctags/debug.h" +#include "third_party/ctags/entry.h" +#include "third_party/ctags/keyword.h" +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/routines.h" +#include "third_party/ctags/vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_ABS, + KEYWORD_ACCESS, + KEYWORD_AFTER, + KEYWORD_ALIAS, + KEYWORD_ALL, + KEYWORD_AND, + KEYWORD_ARCHITECTURE, + KEYWORD_ARRAY, + KEYWORD_ASSERT, + KEYWORD_ATTRIBUTE, + KEYWORD_BEGIN, + KEYWORD_BLOCK, + KEYWORD_BODY, + KEYWORD_BUFFER, + KEYWORD_BUS, + KEYWORD_CASE, + KEYWORD_COMPONENT, + KEYWORD_CONFIGURATION, + KEYWORD_CONSTANT, + KEYWORD_DISCONNECT, + KEYWORD_DOWNTO, + KEYWORD_ELSE, + KEYWORD_ELSIF, + KEYWORD_END, + KEYWORD_ENTITY, + KEYWORD_EXIT, + KEYWORD_FILE, + KEYWORD_FOR, + KEYWORD_FUNCTION, + KEYWORD_GENERATE, + KEYWORD_GENERIC, + KEYWORD_GROUP, + KEYWORD_GUARDED, + KEYWORD_IF, + KEYWORD_IMPURE, + KEYWORD_IN, + KEYWORD_INERTIAL, + KEYWORD_INOUT, + KEYWORD_IS, + KEYWORD_LABEL, + KEYWORD_LIBRARY, + KEYWORD_LINKAGE, + KEYWORD_LITERAL, + KEYWORD_LOOP, + KEYWORD_MAP, + KEYWORD_MOD, + KEYWORD_NAND, + KEYWORD_NEW, + KEYWORD_NEXT, + KEYWORD_NOR, + KEYWORD_NOT, + KEYWORD_NULL, + KEYWORD_OF, + KEYWORD_ON, + KEYWORD_OPEN, + KEYWORD_OR, + KEYWORD_OTHERS, + KEYWORD_OUT, + KEYWORD_PACKAGE, + KEYWORD_PORT, + KEYWORD_POSTPONED, + KEYWORD_PROCEDURE, + KEYWORD_PROCESS, + KEYWORD_PURE, + KEYWORD_RANGE, + KEYWORD_RECORD, + KEYWORD_REGISTER, + KEYWORD_REJECT, + KEYWORD_RETURN, + KEYWORD_ROL, + KEYWORD_ROR, + KEYWORD_SELECT, + KEYWORD_SEVERITY, + KEYWORD_SIGNAL, + KEYWORD_SHARED, + KEYWORD_SLA, + KEYWORD_SLI, + KEYWORD_SRA, + KEYWORD_SRL, + KEYWORD_SUBTYPE, + KEYWORD_THEN, + KEYWORD_TO, + KEYWORD_TRANSPORT, + KEYWORD_TYPE, + KEYWORD_UNAFFECTED, + KEYWORD_UNITS, + KEYWORD_UNTIL, + KEYWORD_USE, + KEYWORD_VARIABLE, + KEYWORD_WAIT, + KEYWORD_WHEN, + KEYWORD_WHILE, + KEYWORD_WITH, + KEYWORD_XNOR, + KEYWORD_XOR +} keywordId; + +/* Used to determine whether keyword is valid for the current language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_NONE, /* none */ + TOKEN_OPEN_PAREN, /* ( */ + TOKEN_CLOSE_PAREN, /* ) */ + TOKEN_COMMA, /* the comma character */ + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_PERIOD, /* . */ + TOKEN_OPERATOR, + TOKEN_SEMICOLON, /* the semicolon character */ + TOKEN_STRING +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString *string; /* the name of the token */ + vString *scope; + unsigned long lineNumber; /* line number of tag */ + fpos_t filePosition; /* file position of line containing name */ +} tokenInfo; + +/* + * DATA DEFINITIONS + */ +static int Lang_vhdl; +static jmp_buf Exception; + +/* Used to index into the VhdlKinds table. */ +typedef enum { + VHDLTAG_UNDEFINED = -1, + VHDLTAG_CONSTANT, + VHDLTAG_TYPE, + VHDLTAG_SUBTYPE, + VHDLTAG_RECORD, + VHDLTAG_ENTITY, + VHDLTAG_COMPONENT, + VHDLTAG_PROTOTYPE, + VHDLTAG_FUNCTION, + VHDLTAG_PROCEDURE, + VHDLTAG_PACKAGE, + VHDLTAG_LOCAL +} vhdlKind; + +static kindOption VhdlKinds[] = { + {TRUE, 'c', "constant", "constant declarations"}, + {TRUE, 't', "type", "type definitions"}, + {TRUE, 'T', "subtype", "subtype definitions"}, + {TRUE, 'r', "record", "record names"}, + {TRUE, 'e', "entity", "entity declarations"}, + {FALSE, 'C', "component", "component declarations"}, + {FALSE, 'd', "prototype", "prototypes"}, + {TRUE, 'f', "function", "function prototypes and declarations"}, + {TRUE, 'p', "procedure", "procedure prototypes and declarations"}, + {TRUE, 'P', "package", "package definitions"}, + {FALSE, 'l', "local", "local definitions"} +}; + +static keywordDesc VhdlKeywordTable[] = { + {"abs", KEYWORD_ABS}, + {"access", KEYWORD_ACCESS}, + {"after", KEYWORD_AFTER}, + {"alias", KEYWORD_ALIAS}, + {"all", KEYWORD_ALL}, + {"and", KEYWORD_AND}, + {"architecture", KEYWORD_ARCHITECTURE}, + {"array", KEYWORD_ARRAY}, + {"assert", KEYWORD_ASSERT}, + {"attribute", KEYWORD_ATTRIBUTE}, + {"begin", KEYWORD_BEGIN}, + {"block", KEYWORD_BLOCK}, + {"body", KEYWORD_BODY}, + {"buffer", KEYWORD_BUFFER}, + {"bus", KEYWORD_BUS}, + {"case", KEYWORD_CASE}, + {"component", KEYWORD_COMPONENT}, + {"configuration", KEYWORD_CONFIGURATION}, + {"constant", KEYWORD_CONSTANT}, + {"disconnect", KEYWORD_DISCONNECT}, + {"downto", KEYWORD_DOWNTO}, + {"else", KEYWORD_ELSE}, + {"elsif", KEYWORD_ELSIF}, + {"end", KEYWORD_END}, + {"entity", KEYWORD_ENTITY}, + {"exit", KEYWORD_EXIT}, + {"file", KEYWORD_FILE}, + {"for", KEYWORD_FOR}, + {"function", KEYWORD_FUNCTION}, + {"generate", KEYWORD_GENERATE}, + {"generic", KEYWORD_GENERIC}, + {"group", KEYWORD_GROUP}, + {"guarded", KEYWORD_GUARDED}, + {"if", KEYWORD_IF}, + {"impure", KEYWORD_IMPURE}, + {"in", KEYWORD_IN}, + {"inertial", KEYWORD_INERTIAL}, + {"inout", KEYWORD_INOUT}, + {"is", KEYWORD_IS}, + {"label", KEYWORD_LABEL}, + {"library", KEYWORD_LIBRARY}, + {"linkage", KEYWORD_LINKAGE}, + {"literal", KEYWORD_LITERAL}, + {"loop", KEYWORD_LOOP}, + {"map", KEYWORD_MAP}, + {"mod", KEYWORD_MOD}, + {"nand", KEYWORD_NAND}, + {"new", KEYWORD_NEW}, + {"next", KEYWORD_NEXT}, + {"nor", KEYWORD_NOR}, + {"not", KEYWORD_NOT}, + {"null", KEYWORD_NULL}, + {"of", KEYWORD_OF}, + {"on", KEYWORD_ON}, + {"open", KEYWORD_OPEN}, + {"or", KEYWORD_OR}, + {"others", KEYWORD_OTHERS}, + {"out", KEYWORD_OUT}, + {"package", KEYWORD_PACKAGE}, + {"port", KEYWORD_PORT}, + {"postponed", KEYWORD_POSTPONED}, + {"procedure", KEYWORD_PROCEDURE}, + {"process", KEYWORD_PROCESS}, + {"pure", KEYWORD_PURE}, + {"range", KEYWORD_RANGE}, + {"record", KEYWORD_RECORD}, + {"register", KEYWORD_REGISTER}, + {"reject", KEYWORD_REJECT}, + {"return", KEYWORD_RETURN}, + {"rol", KEYWORD_ROL}, + {"ror", KEYWORD_ROR}, + {"select", KEYWORD_SELECT}, + {"severity", KEYWORD_SEVERITY}, + {"signal", KEYWORD_SIGNAL}, + {"shared", KEYWORD_SHARED}, + {"sla", KEYWORD_SLA}, + {"sli", KEYWORD_SLI}, + {"sra", KEYWORD_SRA}, + {"srl", KEYWORD_SRL}, + {"subtype", KEYWORD_SUBTYPE}, + {"then", KEYWORD_THEN}, + {"to", KEYWORD_TO}, + {"transport", KEYWORD_TRANSPORT}, + {"type", KEYWORD_TYPE}, + {"unaffected", KEYWORD_UNAFFECTED}, + {"units", KEYWORD_UNITS}, + {"until", KEYWORD_UNTIL}, + {"use", KEYWORD_USE}, + {"variable", KEYWORD_VARIABLE}, + {"wait", KEYWORD_WAIT}, + {"when", KEYWORD_WHEN}, + {"while", KEYWORD_WHILE}, + {"with", KEYWORD_WITH}, + {"xnor", KEYWORD_XNOR}, + {"xor", KEYWORD_XOR} +}; + +/* + * FUNCTION DECLARATIONS + */ +static void parseKeywords (tokenInfo * const token, boolean local); + +/* + * FUNCTION DEFINITIONS + */ + +static boolean isIdentChar1 (const int c) +{ + return (boolean) (isalpha (c) || c == '_'); +} + +static boolean isIdentChar (const int c) +{ + return (boolean) (isalpha (c) || isdigit (c) || c == '_'); +} + +static boolean isIdentifierMatch (const tokenInfo * const token, + const vString * const name) +{ + return (boolean) (isType (token, TOKEN_IDENTIFIER) && + strcasecmp (vStringValue (token->string), vStringValue (name)) == 0); + /* XXX this is copy/paste from eiffel.c and slightly modified */ + /* shouldn't we use strNcasecmp ? */ +} + +static boolean isKeywordOrIdent (const tokenInfo * const token, + const keywordId keyword, const vString * const name) +{ + return (boolean) (isKeyword (token, keyword) || + isIdentifierMatch (token, name)); +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + return token; +} + +static void deleteToken (tokenInfo * const token) +{ + if (token != NULL) + { + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString * const string, const int delimiter) +{ + boolean end = FALSE; + while (!end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\') + { + c = fileGetc (); /* This maybe a ' or ". */ + vStringPut (string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* Read a VHDL identifier beginning with "firstChar" and place it into "name". +*/ +static void parseIdentifier (vString * const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar1 (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo * const token) +{ + int c; + + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + + getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: + longjmp (Exception, (int) ExceptionEOF); + break; + case '(': + token->type = TOKEN_OPEN_PAREN; + break; + case ')': + token->type = TOKEN_CLOSE_PAREN; + break; + case ';': + token->type = TOKEN_SEMICOLON; + break; + case '.': + token->type = TOKEN_PERIOD; + break; + case ',': + token->type = TOKEN_COMMA; + break; + case '\'': /* only single char are inside simple quotes */ + break; /* or it is for attributes so we don't care */ + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + case '-': + c = fileGetc (); + if (c == '-') /* start of a comment */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + else + { + if (!isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + } + break; + default: + if (!isIdentChar1 (c)) + token->type = TOKEN_NONE; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_vhdl); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } +} + +static void skipToKeyword (const keywordId keyword) +{ + tokenInfo *const token = newToken (); + do + { + readToken (token); + } + while (!isKeyword (token, keyword)); + deleteToken (token); +} + +static void skipToMatched (tokenInfo * const token) +{ + int nest_level = 0; + tokenType open_token; + tokenType close_token; + + switch (token->type) + { + case TOKEN_OPEN_PAREN: + open_token = TOKEN_OPEN_PAREN; + close_token = TOKEN_CLOSE_PAREN; + break; + default: + return; + } + + /* + * This routine will skip to a matching closing token. + * It will also handle nested tokens like the (, ) below. + * ( name varchar(30), text binary(10) ) + */ + if (isType (token, open_token)) + { + nest_level++; + while (!(isType (token, close_token) && (nest_level == 0))) + { + readToken (token); + if (isType (token, open_token)) + { + nest_level++; + } + if (isType (token, close_token)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void makeConstTag (tokenInfo * const token, const vhdlKind kind) +{ + if (VhdlKinds[kind].enabled) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = VhdlKinds[kind].name; + e.kind = VhdlKinds[kind].letter; + makeTagEntry (&e); + } +} + +static void makeVhdlTag (tokenInfo * const token, const vhdlKind kind) +{ + if (VhdlKinds[kind].enabled) + { + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if (vStringLength (token->scope) > 0) + { + vString *fulltag = vStringNew (); + vStringCopy (fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue (token->string)); + vStringTerminate (fulltag); + vStringCopy (token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +static void initialize (const langType language) +{ + size_t i; + const size_t count = + sizeof (VhdlKeywordTable) / sizeof (VhdlKeywordTable[0]); + Lang_vhdl = language; + for (i = 0; i < count; ++i) + { + const keywordDesc *const p = &VhdlKeywordTable[i]; + addKeyword (p->name, language, (int) p->id); + } +} + +static void parsePackage (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + Assert (isKeyword (token, KEYWORD_PACKAGE)); + readToken (token); + if (isKeyword (token, KEYWORD_BODY)) + { + readToken (name); + makeVhdlTag (name, VHDLTAG_PACKAGE); + } + else if (isType (token, TOKEN_IDENTIFIER)) + { + makeVhdlTag (token, VHDLTAG_PACKAGE); + } + deleteToken (name); +} + +static void parseModule (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + const vhdlKind kind = isKeyword (token, KEYWORD_ENTITY) ? + VHDLTAG_ENTITY : VHDLTAG_COMPONENT; + Assert (isKeyword (token, KEYWORD_ENTITY) || + isKeyword (token, KEYWORD_COMPONENT)); + readToken (name); + if (kind == VHDLTAG_COMPONENT) + { + makeVhdlTag (name, VHDLTAG_COMPONENT); + skipToKeyword (KEYWORD_END); + fileSkipToCharacter (';'); + } + else + { + readToken (token); + if (isKeyword (token, KEYWORD_IS)) + { + makeVhdlTag (name, VHDLTAG_ENTITY); + skipToKeyword (KEYWORD_END); + fileSkipToCharacter (';'); + } + } + deleteToken (name); +} + +static void parseRecord (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + Assert (isKeyword (token, KEYWORD_RECORD)); + readToken (name); + do + { + readToken (token); /* should be a colon */ + fileSkipToCharacter (';'); + makeVhdlTag (name, VHDLTAG_RECORD); + readToken (name); + } + while (!isKeyword (name, KEYWORD_END)); + fileSkipToCharacter (';'); + deleteToken (name); +} + +static void parseTypes (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + const vhdlKind kind = isKeyword (token, KEYWORD_TYPE) ? + VHDLTAG_TYPE : VHDLTAG_SUBTYPE; + Assert (isKeyword (token, KEYWORD_TYPE) || + isKeyword (token, KEYWORD_SUBTYPE)); + readToken (name); + readToken (token); + if (isKeyword (token, KEYWORD_IS)) + { + readToken (token); /* type */ + if (isKeyword (token, KEYWORD_RECORD)) + { + makeVhdlTag (name, kind); + /*TODO: make tags of the record's names */ + parseRecord (token); + } + else + { + makeVhdlTag (name, kind); + } + } + deleteToken (name); +} + +static void parseConstant (boolean local) +{ + tokenInfo *const name = newToken (); + readToken (name); + if (local) + { + makeVhdlTag (name, VHDLTAG_LOCAL); + } + else + { + makeVhdlTag (name, VHDLTAG_CONSTANT); + } + fileSkipToCharacter (';'); + deleteToken (name); +} + +static void parseSubProgram (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + boolean endSubProgram = FALSE; + const vhdlKind kind = isKeyword (token, KEYWORD_FUNCTION) ? + VHDLTAG_FUNCTION : VHDLTAG_PROCEDURE; + Assert (isKeyword (token, KEYWORD_FUNCTION) || + isKeyword (token, KEYWORD_PROCEDURE)); + readToken (name); /* the name of the function or procedure */ + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + skipToMatched (token); + } + + if (kind == VHDLTAG_FUNCTION) + { + if (isKeyword (token, KEYWORD_RETURN)) + { + /* Read datatype */ + readToken (token); + while (! isKeyword (token, KEYWORD_IS) && + ! isType (token, TOKEN_SEMICOLON)) + { + readToken (token); + } + } + } + + if (isType (token, TOKEN_SEMICOLON)) + { + makeVhdlTag (name, VHDLTAG_PROTOTYPE); + } + else if (isKeyword (token, KEYWORD_IS)) + { + if (kind == VHDLTAG_FUNCTION) + { + makeVhdlTag (name, VHDLTAG_FUNCTION); + do + { + readToken (token); + if (isKeyword (token, KEYWORD_END)) + { + readToken (token); + endSubProgram = isKeywordOrIdent (token, + KEYWORD_FUNCTION, name->string); + fileSkipToCharacter (';'); + } + else + { + parseKeywords (token, TRUE); + } + } while (!endSubProgram); + } + else + { + makeVhdlTag (name, VHDLTAG_PROCEDURE); + do + { + readToken (token); + if (isKeyword (token, KEYWORD_END)) + { + readToken (token); + endSubProgram = isKeywordOrIdent (token, + KEYWORD_PROCEDURE, name->string); + fileSkipToCharacter (';'); + } + else + { + parseKeywords (token, TRUE); + } + } while (!endSubProgram); + } + } + deleteToken (name); +} + +/* TODO */ +/* records */ +static void parseKeywords (tokenInfo * const token, boolean local) +{ + switch (token->keyword) + { + case KEYWORD_END: + fileSkipToCharacter (';'); + break; + case KEYWORD_CONSTANT: + parseConstant (local); + break; + case KEYWORD_TYPE: + parseTypes (token); + break; + case KEYWORD_SUBTYPE: + parseTypes (token); + break; + case KEYWORD_ENTITY: + parseModule (token); + break; + case KEYWORD_COMPONENT: + parseModule (token); + break; + case KEYWORD_FUNCTION: + parseSubProgram (token); + break; + case KEYWORD_PROCEDURE: + parseSubProgram (token); + break; + case KEYWORD_PACKAGE: + parsePackage (token); + break; + default: + break; + } +} + +static void parseVhdlFile (tokenInfo * const token) +{ + do + { + readToken (token); + parseKeywords (token, FALSE); + } while (!isKeyword (token, KEYWORD_END)); +} + +static void findVhdlTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception = (exception_t) (setjmp (Exception)); + + while (exception == ExceptionNone) + parseVhdlFile (token); + + deleteToken (token); +} + +extern parserDefinition *VhdlParser (void) +{ + static const char *const extensions[] = { "vhdl", "vhd", NULL }; + parserDefinition *def = parserNew ("VHDL"); + def->kinds = VhdlKinds; + def->kindCount = KIND_COUNT (VhdlKinds); + def->extensions = extensions; + def->parser = findVhdlTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4 noet: */ diff --git a/third_party/ctags/vim.c b/third_party/ctags/vim.c new file mode 100644 index 000000000..3a9b4d92f --- /dev/null +++ b/third_party/ctags/vim.c @@ -0,0 +1,658 @@ +// clang-format off +/* +* $Id: vim.c 762 2010-07-28 11:38:19Z dfishburn $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Thanks are due to Jay Glanville for significant improvements. +* +* This module contains functions for generating tags for user-defined +* functions for the Vim editor. +*/ + +/* +* INCLUDE FILES +*/ +#include "third_party/ctags/general.h" /* must always come first */ + +#include "libc/mem/alg.h" +#include "libc/str/str.h" +#include "libc/runtime/runtime.h" +#ifdef DEBUG +#include "libc/calls/calls.h" +#include "libc/calls/dprintf.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/mem/fmt.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#endif + + +#include "third_party/ctags/parse.h" +#include "third_party/ctags/read.h" +#include "third_party/ctags/vstring.h" + +#if 0 +typedef struct sLineInfo { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; +} lineInfo; +#endif + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_AUGROUP, + K_COMMAND, + K_FUNCTION, + K_MAP, + K_VARIABLE +} vimKind; + +static kindOption VimKinds [] = { + { TRUE, 'a', "augroup", "autocommand groups" }, + { TRUE, 'c', "command", "user-defined commands" }, + { TRUE, 'f', "function", "function definitions" }, + { TRUE, 'm', "map", "maps" }, + { TRUE, 'v', "variable", "variable definitions" }, +}; + +/* + * DATA DECLARATIONS + */ + +#if 0 +typedef enum eException { + ExceptionNone, ExceptionEOF +} exception_t; +#endif + +/* + * DATA DEFINITIONS + */ + +#if 0 +static jmp_buf Exception; +#endif + +/* + * FUNCTION DEFINITIONS + */ + +/* This function takes a char pointer, tries to find a scope separator in the + * string, and if it does, returns a pointer to the character after the colon, + * and the character defining the scope. + * If a colon is not found, it returns the original pointer. + */ +static const unsigned char* skipPrefix (const unsigned char* name, int *scope) +{ + const unsigned char* result = name; + int counter; + size_t length; + length = strlen((const char*)name); + if (scope != NULL) + *scope = '\0'; + if (length > 3 && name[1] == ':') + { + if (scope != NULL) + *scope = *name; + result = name + 2; + } + else if (length > 5 && strncasecmp ((const char*) name, "", (size_t) 5) == 0) + { + if (scope != NULL) + *scope = *name; + result = name + 5; + } + else + { + /* + * Vim7 check for dictionaries or autoload function names + */ + counter = 0; + do + { + switch ( name[counter] ) + { + case '.': + /* Set the scope to d - Dictionary */ + *scope = 'd'; + break; + case '#': + /* Set the scope to a - autoload */ + *scope = 'a'; + break; + } + ++counter; + } while (isalnum ((int) name[counter]) || + name[counter] == '_' || + name[counter] == '.' || + name[counter] == '#' + ); + } + return result; +} + +static boolean isMap (const unsigned char* line) +{ + /* + * There are many different short cuts for specifying a map. + * This routine should capture all the permutations. + */ + if ( + strncmp ((const char*) line, "map", (size_t) 3) == 0 || + strncmp ((const char*) line, "nm", (size_t) 2) == 0 || + strncmp ((const char*) line, "nma", (size_t) 3) == 0 || + strncmp ((const char*) line, "nmap", (size_t) 4) == 0 || + strncmp ((const char*) line, "vm", (size_t) 2) == 0 || + strncmp ((const char*) line, "vma", (size_t) 3) == 0 || + strncmp ((const char*) line, "vmap", (size_t) 4) == 0 || + strncmp ((const char*) line, "om", (size_t) 2) == 0 || + strncmp ((const char*) line, "oma", (size_t) 3) == 0 || + strncmp ((const char*) line, "omap", (size_t) 4) == 0 || + strncmp ((const char*) line, "im", (size_t) 2) == 0 || + strncmp ((const char*) line, "ima", (size_t) 3) == 0 || + strncmp ((const char*) line, "imap", (size_t) 4) == 0 || + strncmp ((const char*) line, "lm", (size_t) 2) == 0 || + strncmp ((const char*) line, "lma", (size_t) 3) == 0 || + strncmp ((const char*) line, "lmap", (size_t) 4) == 0 || + strncmp ((const char*) line, "cm", (size_t) 2) == 0 || + strncmp ((const char*) line, "cma", (size_t) 3) == 0 || + strncmp ((const char*) line, "cmap", (size_t) 4) == 0 || + strncmp ((const char*) line, "no", (size_t) 2) == 0 || + strncmp ((const char*) line, "nor", (size_t) 3) == 0 || + strncmp ((const char*) line, "nore", (size_t) 4) == 0 || + strncmp ((const char*) line, "norem", (size_t) 5) == 0 || + strncmp ((const char*) line, "norema", (size_t) 6) == 0 || + strncmp ((const char*) line, "noremap", (size_t) 7) == 0 || + strncmp ((const char*) line, "nno", (size_t) 3) == 0 || + strncmp ((const char*) line, "nnor", (size_t) 4) == 0 || + strncmp ((const char*) line, "nnore", (size_t) 5) == 0 || + strncmp ((const char*) line, "nnorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "nnorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "nnoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "vno", (size_t) 3) == 0 || + strncmp ((const char*) line, "vnor", (size_t) 4) == 0 || + strncmp ((const char*) line, "vnore", (size_t) 5) == 0 || + strncmp ((const char*) line, "vnorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "vnorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "vnoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "ono", (size_t) 3) == 0 || + strncmp ((const char*) line, "onor", (size_t) 4) == 0 || + strncmp ((const char*) line, "onore", (size_t) 5) == 0 || + strncmp ((const char*) line, "onorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "onorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "onoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "ino", (size_t) 3) == 0 || + strncmp ((const char*) line, "inor", (size_t) 4) == 0 || + strncmp ((const char*) line, "inore", (size_t) 5) == 0 || + strncmp ((const char*) line, "inorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "inorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "inoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "lno", (size_t) 3) == 0 || + strncmp ((const char*) line, "lnor", (size_t) 4) == 0 || + strncmp ((const char*) line, "lnore", (size_t) 5) == 0 || + strncmp ((const char*) line, "lnorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "lnorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "lnoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "cno", (size_t) 3) == 0 || + strncmp ((const char*) line, "cnor", (size_t) 4) == 0 || + strncmp ((const char*) line, "cnore", (size_t) 5) == 0 || + strncmp ((const char*) line, "cnorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "cnorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "cnoremap", (size_t) 8) == 0 + ) + return TRUE; + + return FALSE; +} + +static const unsigned char * readVimLine (void) +{ + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + while (isspace ((int) *line)) + ++line; + + if ((int) *line == '"') + continue; /* skip comment */ + + break; + } + + return line; +} + +static void parseFunction (const unsigned char *line) +{ + vString *name = vStringNew (); + /* boolean inFunction = FALSE; */ + int scope; + + const unsigned char *cp = line + 1; + + if ((int) *++cp == 'n' && (int) *++cp == 'c' && + (int) *++cp == 't' && (int) *++cp == 'i' && + (int) *++cp == 'o' && (int) *++cp == 'n') + ++cp; + if ((int) *cp == '!') + ++cp; + if (isspace ((int) *cp)) + { + while (*cp && isspace ((int) *cp)) + ++cp; + + if (*cp) + { + cp = skipPrefix (cp, &scope); + if (isupper ((int) *cp) || + scope == 's' || /* script scope */ + scope == '<' || /* script scope */ + scope == 'd' || /* dictionary */ + scope == 'a') /* autoload */ + { + do + { + vStringPut (name, (int) *cp); + ++cp; + } while (isalnum ((int) *cp) || *cp == '_' || *cp == '.' || *cp == '#'); + vStringTerminate (name); + makeSimpleTag (name, VimKinds, K_FUNCTION); + vStringClear (name); + } + } + } + + /* TODO - update struct to indicate inside function */ + while ((line = readVimLine ()) != NULL) + { + /* + * Vim7 added the for/endfo[r] construct, so we must first + * check for an "endfo", before a "endf" + */ + if ( (!strncmp ((const char*) line, "endfo", (size_t) 5) == 0) && + (strncmp ((const char*) line, "endf", (size_t) 4) == 0) ) + break; + /* TODO - call parseVimLine */ + } + vStringDelete (name); +} + +static void parseAutogroup (const unsigned char *line) +{ + vString *name = vStringNew (); + + /* Found Autocommand Group (augroup) */ + const unsigned char *cp = line + 2; + if ((int) *++cp == 'r' && (int) *++cp == 'o' && + (int) *++cp == 'u' && (int) *++cp == 'p') + ++cp; + if (isspace ((int) *cp)) + { + while (*cp && isspace ((int) *cp)) + ++cp; + + if (*cp) + { + if (strncasecmp ((const char*) cp, "end", (size_t) 3) != 0) + { + do + { + vStringPut (name, (int) *cp); + ++cp; + } while (isalnum ((int) *cp) || *cp == '_'); + vStringTerminate (name); + makeSimpleTag (name, VimKinds, K_AUGROUP); + vStringClear (name); + } + } + } + vStringDelete (name); +} + +static boolean parseCommand (const unsigned char *line) +{ + vString *name = vStringNew (); + boolean cmdProcessed = TRUE; + + /* + * Found a user-defined command + * + * They can have many options preceeded by a dash + * command! -nargs=+ -complete Select :call s:DB_execSql("select " . ) + * The name of the command should be the first word not preceeded by a dash + * + */ + const unsigned char *cp = line; + + if ( (int) *cp == '\\' ) + { + /* + * We are recursively calling this function is the command + * has been continued on to the next line + * + * Vim statements can be continued onto a newline using a \ + * to indicate the previous line is continuing. + * + * com -nargs=1 -bang -complete=customlist,EditFileComplete + * \ EditFile edit + * + * If the following lines do not have a line continuation + * the command must not be spanning multiple lines and should + * be synatically incorrect. + */ + if ((int) *cp == '\\') + ++cp; + + while (*cp && isspace ((int) *cp)) + ++cp; + } + else if ( (!strncmp ((const char*) line, "comp", (size_t) 4) == 0) && + (!strncmp ((const char*) line, "comc", (size_t) 4) == 0) && + (strncmp ((const char*) line, "com", (size_t) 3) == 0) ) + { + cp += 2; + if ((int) *++cp == 'm' && (int) *++cp == 'a' && + (int) *++cp == 'n' && (int) *++cp == 'd') + ++cp; + + if ((int) *cp == '!') + ++cp; + + if ((int) *cp != ' ') + { + /* + * :command must be followed by a space. If it is not, it is + * not a valid command. + * Treat the line as processed and continue. + */ + cmdProcessed = TRUE; + goto cleanUp; + } + + while (*cp && isspace ((int) *cp)) + ++cp; + } + else + { + /* + * We are recursively calling this function. If it does not start + * with "com" or a line continuation character, we have moved off + * the command line and should let the other routines parse this file. + */ + cmdProcessed = FALSE; + goto cleanUp; + } + + /* + * Strip off any spaces and options which are part of the command. + * These should preceed the command name. + */ + do + { + if (isspace ((int) *cp)) + { + ++cp; + } + else if (*cp == '-') + { + /* + * Read until the next space which separates options or the name + */ + while (*cp && !isspace ((int) *cp)) + ++cp; + } + else + break; + } while ( *cp ); + + if ( ! *cp ) + { + /* + * We have reached the end of the line without finding the command name. + * Read the next line and continue processing it as a command. + */ + line = readVimLine(); + parseCommand(line); + goto cleanUp; + } + + do + { + vStringPut (name, (int) *cp); + ++cp; + } while (isalnum ((int) *cp) || *cp == '_'); + + vStringTerminate (name); + makeSimpleTag (name, VimKinds, K_COMMAND); + vStringClear (name); + +cleanUp: + vStringDelete (name); + + return cmdProcessed; +} + +static void parseLet (const unsigned char *line) +{ + vString *name = vStringNew (); + + /* we've found a variable declared outside of a function!! */ + const unsigned char *cp = line + 3; + const unsigned char *np = line; + /* get the name */ + if (isspace ((int) *cp)) + { + while (*cp && isspace ((int) *cp)) + ++cp; + + /* + * Ignore lets which set: + * & - local buffer vim settings + * @ - registers + * [ - Lists or Dictionaries + */ + if (!*cp || *cp == '&' || *cp == '@' || *cp == '[' ) + goto cleanUp; + + /* + * Ignore vim variables which are read only + * v: - Vim variables. + */ + np = cp; + ++np; + if ((int) *cp == 'v' && (int) *np == ':' ) + goto cleanUp; + + /* deal with spaces, $, @ and & */ + while (*cp && *cp != '$' && !isalnum ((int) *cp)) + ++cp; + + if (!*cp) + goto cleanUp; + + /* cp = skipPrefix (cp, &scope); */ + do + { + if (!*cp) + break; + + vStringPut (name, (int) *cp); + ++cp; + } while (isalnum ((int) *cp) || *cp == '_' || *cp == '#' || *cp == ':' || *cp == '$'); + vStringTerminate (name); + makeSimpleTag (name, VimKinds, K_VARIABLE); + vStringClear (name); + } + +cleanUp: + vStringDelete (name); +} + +static boolean parseMap (const unsigned char *line) +{ + vString *name = vStringNew (); + + const unsigned char *cp = line; + + /* Remove map */ + while (*cp && isalnum ((int) *cp)) + ++cp; + + if ((int) *cp == '!') + ++cp; + + /* + * Maps follow this basic format + * map + * nnoremap :Tlist + * map scdt GetColumnDataType + * inoremap ,,, diwi<pa>pa>kA + * inoremap ( =PreviewFunctionSignature() + * + * The Vim help shows the various special arguments available to a map: + * 1.2 SPECIAL ARGUMENTS *:map-arguments* + * + * + *