mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Initial revision
This commit is contained in:
parent
c636014c43
commit
85a5fbbdfe
78 changed files with 13589 additions and 0 deletions
101
Parser/acceler.c
Normal file
101
Parser/acceler.c
Normal file
|
@ -0,0 +1,101 @@
|
|||
/* Parser accelerator module */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "grammar.h"
|
||||
#include "token.h"
|
||||
#include "malloc.h"
|
||||
|
||||
static void
|
||||
fixstate(g, d, s)
|
||||
grammar *g;
|
||||
dfa *d;
|
||||
state *s;
|
||||
{
|
||||
arc *a;
|
||||
int k;
|
||||
int *accel;
|
||||
int nl = g->g_ll.ll_nlabels;
|
||||
s->s_accept = 0;
|
||||
accel = NEW(int, nl);
|
||||
for (k = 0; k < nl; k++)
|
||||
accel[k] = -1;
|
||||
a = s->s_arc;
|
||||
for (k = s->s_narcs; --k >= 0; a++) {
|
||||
int lbl = a->a_lbl;
|
||||
label *l = &g->g_ll.ll_label[lbl];
|
||||
int type = l->lb_type;
|
||||
if (a->a_arrow >= (1 << 7)) {
|
||||
printf("XXX too many states!\n");
|
||||
continue;
|
||||
}
|
||||
if (ISNONTERMINAL(type)) {
|
||||
dfa *d1 = finddfa(g, type);
|
||||
int ibit;
|
||||
if (type - NT_OFFSET >= (1 << 7)) {
|
||||
printf("XXX too high nonterminal number!\n");
|
||||
continue;
|
||||
}
|
||||
for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
|
||||
if (testbit(d1->d_first, ibit)) {
|
||||
if (accel[ibit] != -1)
|
||||
printf("XXX ambiguity!\n");
|
||||
accel[ibit] = a->a_arrow | (1 << 7) |
|
||||
((type - NT_OFFSET) << 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (lbl == EMPTY)
|
||||
s->s_accept = 1;
|
||||
else if (lbl >= 0 && lbl < nl)
|
||||
accel[lbl] = a->a_arrow;
|
||||
}
|
||||
while (nl > 0 && accel[nl-1] == -1)
|
||||
nl--;
|
||||
for (k = 0; k < nl && accel[k] == -1;)
|
||||
k++;
|
||||
if (k < nl) {
|
||||
int i;
|
||||
s->s_accel = NEW(int, nl-k);
|
||||
if (s->s_accel == NULL) {
|
||||
fprintf(stderr, "no mem to add parser accelerators\n");
|
||||
exit(1);
|
||||
}
|
||||
s->s_lower = k;
|
||||
s->s_upper = nl;
|
||||
for (i = 0; k < nl; i++, k++)
|
||||
s->s_accel[i] = accel[k];
|
||||
}
|
||||
DEL(accel);
|
||||
}
|
||||
|
||||
static void
|
||||
fixdfa(g, d)
|
||||
grammar *g;
|
||||
dfa *d;
|
||||
{
|
||||
state *s;
|
||||
int j;
|
||||
s = d->d_state;
|
||||
for (j = 0; j < d->d_nstates; j++, s++)
|
||||
fixstate(g, d, s);
|
||||
}
|
||||
|
||||
void
|
||||
addaccelerators(g)
|
||||
grammar *g;
|
||||
{
|
||||
dfa *d;
|
||||
int i;
|
||||
#ifdef DEBUG
|
||||
printf("Adding parser accellerators ...\n");
|
||||
#endif
|
||||
d = g->g_dfa;
|
||||
for (i = g->g_ndfas; --i >= 0; d++)
|
||||
fixdfa(g, d);
|
||||
g->g_accel = 1;
|
||||
#ifdef DEBUG
|
||||
printf("Done.\n");
|
||||
#endif
|
||||
}
|
1
Parser/assert.h
Normal file
1
Parser/assert.h
Normal file
|
@ -0,0 +1 @@
|
|||
#define assert(e) { if (!(e)) { printf("Assertion failed\n"); abort(); } }
|
76
Parser/bitset.c
Normal file
76
Parser/bitset.c
Normal file
|
@ -0,0 +1,76 @@
|
|||
/* Bitset primitives */
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "malloc.h"
|
||||
#include "bitset.h"
|
||||
|
||||
bitset
|
||||
newbitset(nbits)
|
||||
int nbits;
|
||||
{
|
||||
int nbytes = NBYTES(nbits);
|
||||
bitset ss = NEW(BYTE, nbytes);
|
||||
|
||||
if (ss == NULL)
|
||||
fatal("no mem for bitset");
|
||||
|
||||
ss += nbytes;
|
||||
while (--nbytes >= 0)
|
||||
*--ss = 0;
|
||||
return ss;
|
||||
}
|
||||
|
||||
void
|
||||
delbitset(ss)
|
||||
bitset ss;
|
||||
{
|
||||
DEL(ss);
|
||||
}
|
||||
|
||||
int
|
||||
addbit(ss, ibit)
|
||||
bitset ss;
|
||||
int ibit;
|
||||
{
|
||||
int ibyte = BIT2BYTE(ibit);
|
||||
BYTE mask = BIT2MASK(ibit);
|
||||
|
||||
if (ss[ibyte] & mask)
|
||||
return 0; /* Bit already set */
|
||||
ss[ibyte] |= mask;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0 /* Now a macro */
|
||||
int
|
||||
testbit(ss, ibit)
|
||||
bitset ss;
|
||||
int ibit;
|
||||
{
|
||||
return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
samebitset(ss1, ss2, nbits)
|
||||
bitset ss1, ss2;
|
||||
int nbits;
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = NBYTES(nbits); --i >= 0; )
|
||||
if (*ss1++ != *ss2++)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
mergebitset(ss1, ss2, nbits)
|
||||
bitset ss1, ss2;
|
||||
int nbits;
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = NBYTES(nbits); --i >= 0; )
|
||||
*ss1++ |= *ss2++;
|
||||
}
|
109
Parser/firstsets.c
Normal file
109
Parser/firstsets.c
Normal file
|
@ -0,0 +1,109 @@
|
|||
/* Computation of FIRST stets */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "malloc.h"
|
||||
#include "grammar.h"
|
||||
#include "token.h"
|
||||
|
||||
extern int debugging;
|
||||
|
||||
static void
|
||||
calcfirstset(g, d)
|
||||
grammar *g;
|
||||
dfa *d;
|
||||
{
|
||||
int i, j;
|
||||
state *s;
|
||||
arc *a;
|
||||
int nsyms;
|
||||
int *sym;
|
||||
int nbits;
|
||||
static bitset dummy;
|
||||
bitset result;
|
||||
int type;
|
||||
dfa *d1;
|
||||
label *l0;
|
||||
|
||||
if (debugging)
|
||||
printf("Calculate FIRST set for '%s'\n", d->d_name);
|
||||
|
||||
if (dummy == NULL)
|
||||
dummy = newbitset(1);
|
||||
if (d->d_first == dummy) {
|
||||
fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
|
||||
return;
|
||||
}
|
||||
if (d->d_first != NULL) {
|
||||
fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
|
||||
d->d_name);
|
||||
}
|
||||
d->d_first = dummy;
|
||||
|
||||
l0 = g->g_ll.ll_label;
|
||||
nbits = g->g_ll.ll_nlabels;
|
||||
result = newbitset(nbits);
|
||||
|
||||
sym = NEW(int, 1);
|
||||
if (sym == NULL)
|
||||
fatal("no mem for new sym in calcfirstset");
|
||||
nsyms = 1;
|
||||
sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
|
||||
|
||||
s = &d->d_state[d->d_initial];
|
||||
for (i = 0; i < s->s_narcs; i++) {
|
||||
a = &s->s_arc[i];
|
||||
for (j = 0; j < nsyms; j++) {
|
||||
if (sym[j] == a->a_lbl)
|
||||
break;
|
||||
}
|
||||
if (j >= nsyms) { /* New label */
|
||||
RESIZE(sym, int, nsyms + 1);
|
||||
if (sym == NULL)
|
||||
fatal("no mem to resize sym in calcfirstset");
|
||||
sym[nsyms++] = a->a_lbl;
|
||||
type = l0[a->a_lbl].lb_type;
|
||||
if (ISNONTERMINAL(type)) {
|
||||
d1 = finddfa(g, type);
|
||||
if (d1->d_first == dummy) {
|
||||
fprintf(stderr,
|
||||
"Left-recursion below '%s'\n",
|
||||
d->d_name);
|
||||
}
|
||||
else {
|
||||
if (d1->d_first == NULL)
|
||||
calcfirstset(g, d1);
|
||||
mergebitset(result, d1->d_first, nbits);
|
||||
}
|
||||
}
|
||||
else if (ISTERMINAL(type)) {
|
||||
addbit(result, a->a_lbl);
|
||||
}
|
||||
}
|
||||
}
|
||||
d->d_first = result;
|
||||
if (debugging) {
|
||||
printf("FIRST set for '%s': {", d->d_name);
|
||||
for (i = 0; i < nbits; i++) {
|
||||
if (testbit(result, i))
|
||||
printf(" %s", labelrepr(&l0[i]));
|
||||
}
|
||||
printf(" }\n");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
addfirstsets(g)
|
||||
grammar *g;
|
||||
{
|
||||
int i;
|
||||
dfa *d;
|
||||
|
||||
printf("Adding FIRST sets ...\n");
|
||||
for (i = 0; i < g->g_ndfas; i++) {
|
||||
d = &g->g_dfa[i];
|
||||
if (d->d_first == NULL)
|
||||
calcfirstset(g, d);
|
||||
}
|
||||
}
|
207
Parser/grammar.c
Normal file
207
Parser/grammar.c
Normal file
|
@ -0,0 +1,207 @@
|
|||
/* Grammar implementation */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "malloc.h"
|
||||
#include "assert.h"
|
||||
#include "token.h"
|
||||
#include "grammar.h"
|
||||
|
||||
extern int debugging;
|
||||
|
||||
grammar *
|
||||
newgrammar(start)
|
||||
int start;
|
||||
{
|
||||
grammar *g;
|
||||
|
||||
g = NEW(grammar, 1);
|
||||
if (g == NULL)
|
||||
fatal("no mem for new grammar");
|
||||
g->g_ndfas = 0;
|
||||
g->g_dfa = NULL;
|
||||
g->g_start = start;
|
||||
g->g_ll.ll_nlabels = 0;
|
||||
g->g_ll.ll_label = NULL;
|
||||
return g;
|
||||
}
|
||||
|
||||
dfa *
|
||||
adddfa(g, type, name)
|
||||
grammar *g;
|
||||
int type;
|
||||
char *name;
|
||||
{
|
||||
dfa *d;
|
||||
|
||||
RESIZE(g->g_dfa, dfa, g->g_ndfas + 1);
|
||||
if (g->g_dfa == NULL)
|
||||
fatal("no mem to resize dfa in adddfa");
|
||||
d = &g->g_dfa[g->g_ndfas++];
|
||||
d->d_type = type;
|
||||
d->d_name = name;
|
||||
d->d_nstates = 0;
|
||||
d->d_state = NULL;
|
||||
d->d_initial = -1;
|
||||
d->d_first = NULL;
|
||||
return d; /* Only use while fresh! */
|
||||
}
|
||||
|
||||
int
|
||||
addstate(d)
|
||||
dfa *d;
|
||||
{
|
||||
state *s;
|
||||
|
||||
RESIZE(d->d_state, state, d->d_nstates + 1);
|
||||
if (d->d_state == NULL)
|
||||
fatal("no mem to resize state in addstate");
|
||||
s = &d->d_state[d->d_nstates++];
|
||||
s->s_narcs = 0;
|
||||
s->s_arc = NULL;
|
||||
return s - d->d_state;
|
||||
}
|
||||
|
||||
void
|
||||
addarc(d, from, to, lbl)
|
||||
dfa *d;
|
||||
int lbl;
|
||||
{
|
||||
state *s;
|
||||
arc *a;
|
||||
|
||||
assert(0 <= from && from < d->d_nstates);
|
||||
assert(0 <= to && to < d->d_nstates);
|
||||
|
||||
s = &d->d_state[from];
|
||||
RESIZE(s->s_arc, arc, s->s_narcs + 1);
|
||||
if (s->s_arc == NULL)
|
||||
fatal("no mem to resize arc list in addarc");
|
||||
a = &s->s_arc[s->s_narcs++];
|
||||
a->a_lbl = lbl;
|
||||
a->a_arrow = to;
|
||||
}
|
||||
|
||||
int
|
||||
addlabel(ll, type, str)
|
||||
labellist *ll;
|
||||
int type;
|
||||
char *str;
|
||||
{
|
||||
int i;
|
||||
label *lb;
|
||||
|
||||
for (i = 0; i < ll->ll_nlabels; i++) {
|
||||
if (ll->ll_label[i].lb_type == type &&
|
||||
strcmp(ll->ll_label[i].lb_str, str) == 0)
|
||||
return i;
|
||||
}
|
||||
RESIZE(ll->ll_label, label, ll->ll_nlabels + 1);
|
||||
if (ll->ll_label == NULL)
|
||||
fatal("no mem to resize labellist in addlabel");
|
||||
lb = &ll->ll_label[ll->ll_nlabels++];
|
||||
lb->lb_type = type;
|
||||
lb->lb_str = str; /* XXX strdup(str) ??? */
|
||||
return lb - ll->ll_label;
|
||||
}
|
||||
|
||||
/* Same, but rather dies than adds */
|
||||
|
||||
int
|
||||
findlabel(ll, type, str)
|
||||
labellist *ll;
|
||||
int type;
|
||||
char *str;
|
||||
{
|
||||
int i;
|
||||
label *lb;
|
||||
|
||||
for (i = 0; i < ll->ll_nlabels; i++) {
|
||||
if (ll->ll_label[i].lb_type == type /*&&
|
||||
strcmp(ll->ll_label[i].lb_str, str) == 0*/)
|
||||
return i;
|
||||
}
|
||||
fprintf(stderr, "Label %d/'%s' not found\n", type, str);
|
||||
abort();
|
||||
}
|
||||
|
||||
static void
|
||||
translabel(g, lb)
|
||||
grammar *g;
|
||||
label *lb;
|
||||
{
|
||||
int i;
|
||||
|
||||
if (debugging)
|
||||
printf("Translating label %s ...\n", labelrepr(lb));
|
||||
|
||||
if (lb->lb_type == NAME) {
|
||||
for (i = 0; i < g->g_ndfas; i++) {
|
||||
if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
|
||||
if (debugging)
|
||||
printf("Label %s is non-terminal %d.\n",
|
||||
lb->lb_str,
|
||||
g->g_dfa[i].d_type);
|
||||
lb->lb_type = g->g_dfa[i].d_type;
|
||||
lb->lb_str = NULL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < (int)N_TOKENS; i++) {
|
||||
if (strcmp(lb->lb_str, tok_name[i]) == 0) {
|
||||
if (debugging)
|
||||
printf("Label %s is terminal %d.\n",
|
||||
lb->lb_str, i);
|
||||
lb->lb_type = i;
|
||||
lb->lb_str = NULL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
printf("Can't translate NAME label '%s'\n", lb->lb_str);
|
||||
return;
|
||||
}
|
||||
|
||||
if (lb->lb_type == STRING) {
|
||||
if (isalpha(lb->lb_str[1])) {
|
||||
char *p, *strchr();
|
||||
if (debugging)
|
||||
printf("Label %s is a keyword\n", lb->lb_str);
|
||||
lb->lb_type = NAME;
|
||||
lb->lb_str++;
|
||||
p = strchr(lb->lb_str, '\'');
|
||||
if (p)
|
||||
*p = '\0';
|
||||
}
|
||||
else {
|
||||
if (lb->lb_str[2] == lb->lb_str[0]) {
|
||||
int type = (int) tok_1char(lb->lb_str[1]);
|
||||
if (type != OP) {
|
||||
lb->lb_type = type;
|
||||
lb->lb_str = NULL;
|
||||
}
|
||||
else
|
||||
printf("Unknown OP label %s\n",
|
||||
lb->lb_str);
|
||||
}
|
||||
else
|
||||
printf("Can't translate STRING label %s\n",
|
||||
lb->lb_str);
|
||||
}
|
||||
}
|
||||
else
|
||||
printf("Can't translate label '%s'\n", labelrepr(lb));
|
||||
}
|
||||
|
||||
void
|
||||
translatelabels(g)
|
||||
grammar *g;
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("Translating labels ...\n");
|
||||
/* Don't translate EMPTY */
|
||||
for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
|
||||
translabel(g, &g->g_ll.ll_label[i]);
|
||||
}
|
52
Parser/grammar1.c
Normal file
52
Parser/grammar1.c
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* Grammar subroutines needed by parser */
|
||||
|
||||
#include "PROTO.h"
|
||||
#define NULL 0
|
||||
#include "assert.h"
|
||||
#include "grammar.h"
|
||||
#include "token.h"
|
||||
|
||||
/* Return the DFA for the given type */
|
||||
|
||||
dfa *
|
||||
finddfa(g, type)
|
||||
grammar *g;
|
||||
register int type;
|
||||
{
|
||||
register int i;
|
||||
register dfa *d;
|
||||
|
||||
for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
|
||||
if (d->d_type == type)
|
||||
return d;
|
||||
}
|
||||
assert(0);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
char *
|
||||
labelrepr(lb)
|
||||
label *lb;
|
||||
{
|
||||
static char buf[100];
|
||||
|
||||
if (lb->lb_type == ENDMARKER)
|
||||
return "EMPTY";
|
||||
else if (ISNONTERMINAL(lb->lb_type)) {
|
||||
if (lb->lb_str == NULL) {
|
||||
sprintf(buf, "NT%d", lb->lb_type);
|
||||
return buf;
|
||||
}
|
||||
else
|
||||
return lb->lb_str;
|
||||
}
|
||||
else {
|
||||
if (lb->lb_str == NULL)
|
||||
return tok_name[lb->lb_type];
|
||||
else {
|
||||
sprintf(buf, "%.32s(%.32s)",
|
||||
tok_name[lb->lb_type], lb->lb_str);
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
95
Parser/intrcheck.c
Normal file
95
Parser/intrcheck.c
Normal file
|
@ -0,0 +1,95 @@
|
|||
/* Check for interrupts */
|
||||
|
||||
#ifdef MSDOS
|
||||
|
||||
/* This might work for MS-DOS: */
|
||||
|
||||
void
|
||||
initintr()
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
intrcheck()
|
||||
{
|
||||
int interrupted = 0;
|
||||
while (kbhit()) {
|
||||
if (getch() == '\003')
|
||||
interrupted = 1;
|
||||
}
|
||||
return interrupted;
|
||||
}
|
||||
|
||||
#define OK
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef THINK_C
|
||||
|
||||
#include <MacHeaders>
|
||||
|
||||
void
|
||||
initintr()
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
intrcheck()
|
||||
{
|
||||
/* Static to make it faster(?) only */
|
||||
static EventRecord e;
|
||||
|
||||
/* XXX This fails if the user first types ahead and then
|
||||
decides to interrupt -- repeating Command-. until the
|
||||
event queue overflows may work though. */
|
||||
if (EventAvail(keyDownMask|autoKeyMask, &e) &&
|
||||
(e.modifiers & cmdKey) &&
|
||||
(e.message & charCodeMask) == '.') {
|
||||
(void) GetNextEvent(keyDownMask|autoKeyMask, &e);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define OK
|
||||
|
||||
#endif /* THINK_C */
|
||||
|
||||
|
||||
#ifndef OK
|
||||
|
||||
/* Default version -- should work for Unix and Standard C */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "sigtype.h"
|
||||
|
||||
static int interrupted;
|
||||
|
||||
static SIGTYPE
|
||||
intcatcher(sig)
|
||||
int sig;
|
||||
{
|
||||
interrupted = 1;
|
||||
signal(SIGINT, intcatcher);
|
||||
}
|
||||
|
||||
void
|
||||
initintr()
|
||||
{
|
||||
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGINT, intcatcher);
|
||||
}
|
||||
|
||||
int
|
||||
intrcheck()
|
||||
{
|
||||
if (!interrupted)
|
||||
return 0;
|
||||
interrupted = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif /* !OK */
|
68
Parser/listnode.c
Normal file
68
Parser/listnode.c
Normal file
|
@ -0,0 +1,68 @@
|
|||
/* List a node on a file */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "token.h"
|
||||
#include "node.h"
|
||||
|
||||
static int level, atbol;
|
||||
|
||||
static void
|
||||
list1node(fp, n)
|
||||
FILE *fp;
|
||||
node *n;
|
||||
{
|
||||
if (n == 0)
|
||||
return;
|
||||
if (ISNONTERMINAL(TYPE(n))) {
|
||||
int i;
|
||||
for (i = 0; i < NCH(n); i++)
|
||||
list1node(fp, CHILD(n, i));
|
||||
}
|
||||
else if (ISTERMINAL(TYPE(n))) {
|
||||
switch (TYPE(n)) {
|
||||
case INDENT:
|
||||
++level;
|
||||
break;
|
||||
case DEDENT:
|
||||
--level;
|
||||
break;
|
||||
default:
|
||||
if (atbol) {
|
||||
int i;
|
||||
for (i = 0; i < level; ++i)
|
||||
fprintf(fp, "\t");
|
||||
atbol = 0;
|
||||
}
|
||||
if (TYPE(n) == NEWLINE) {
|
||||
if (STR(n) != NULL)
|
||||
fprintf(fp, "%s", STR(n));
|
||||
fprintf(fp, "\n");
|
||||
atbol = 1;
|
||||
}
|
||||
else
|
||||
fprintf(fp, "%s ", STR(n));
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
fprintf(fp, "? ");
|
||||
}
|
||||
|
||||
void
|
||||
listnode(fp, n)
|
||||
FILE *fp;
|
||||
node *n;
|
||||
{
|
||||
level = 0;
|
||||
atbol = 1;
|
||||
list1node(fp, n);
|
||||
}
|
||||
|
||||
void
|
||||
listtree(n)
|
||||
node *n;
|
||||
{
|
||||
listnode(stdout, n);
|
||||
}
|
151
Parser/metagrammar.c
Normal file
151
Parser/metagrammar.c
Normal file
|
@ -0,0 +1,151 @@
|
|||
#include "PROTO.h"
|
||||
#include "metagrammar.h"
|
||||
#include "grammar.h"
|
||||
static arc arcs_0_0[3] = {
|
||||
{2, 0},
|
||||
{3, 0},
|
||||
{4, 1},
|
||||
};
|
||||
static arc arcs_0_1[1] = {
|
||||
{0, 1},
|
||||
};
|
||||
static state states_0[2] = {
|
||||
{3, arcs_0_0},
|
||||
{1, arcs_0_1},
|
||||
};
|
||||
static arc arcs_1_0[1] = {
|
||||
{5, 1},
|
||||
};
|
||||
static arc arcs_1_1[1] = {
|
||||
{6, 2},
|
||||
};
|
||||
static arc arcs_1_2[1] = {
|
||||
{7, 3},
|
||||
};
|
||||
static arc arcs_1_3[1] = {
|
||||
{3, 4},
|
||||
};
|
||||
static arc arcs_1_4[1] = {
|
||||
{0, 4},
|
||||
};
|
||||
static state states_1[5] = {
|
||||
{1, arcs_1_0},
|
||||
{1, arcs_1_1},
|
||||
{1, arcs_1_2},
|
||||
{1, arcs_1_3},
|
||||
{1, arcs_1_4},
|
||||
};
|
||||
static arc arcs_2_0[1] = {
|
||||
{8, 1},
|
||||
};
|
||||
static arc arcs_2_1[2] = {
|
||||
{9, 0},
|
||||
{0, 1},
|
||||
};
|
||||
static state states_2[2] = {
|
||||
{1, arcs_2_0},
|
||||
{2, arcs_2_1},
|
||||
};
|
||||
static arc arcs_3_0[1] = {
|
||||
{10, 1},
|
||||
};
|
||||
static arc arcs_3_1[2] = {
|
||||
{10, 1},
|
||||
{0, 1},
|
||||
};
|
||||
static state states_3[2] = {
|
||||
{1, arcs_3_0},
|
||||
{2, arcs_3_1},
|
||||
};
|
||||
static arc arcs_4_0[2] = {
|
||||
{11, 1},
|
||||
{13, 2},
|
||||
};
|
||||
static arc arcs_4_1[1] = {
|
||||
{7, 3},
|
||||
};
|
||||
static arc arcs_4_2[3] = {
|
||||
{14, 4},
|
||||
{15, 4},
|
||||
{0, 2},
|
||||
};
|
||||
static arc arcs_4_3[1] = {
|
||||
{12, 4},
|
||||
};
|
||||
static arc arcs_4_4[1] = {
|
||||
{0, 4},
|
||||
};
|
||||
static state states_4[5] = {
|
||||
{2, arcs_4_0},
|
||||
{1, arcs_4_1},
|
||||
{3, arcs_4_2},
|
||||
{1, arcs_4_3},
|
||||
{1, arcs_4_4},
|
||||
};
|
||||
static arc arcs_5_0[3] = {
|
||||
{5, 1},
|
||||
{16, 1},
|
||||
{17, 2},
|
||||
};
|
||||
static arc arcs_5_1[1] = {
|
||||
{0, 1},
|
||||
};
|
||||
static arc arcs_5_2[1] = {
|
||||
{7, 3},
|
||||
};
|
||||
static arc arcs_5_3[1] = {
|
||||
{18, 1},
|
||||
};
|
||||
static state states_5[4] = {
|
||||
{3, arcs_5_0},
|
||||
{1, arcs_5_1},
|
||||
{1, arcs_5_2},
|
||||
{1, arcs_5_3},
|
||||
};
|
||||
static dfa dfas[6] = {
|
||||
{256, "MSTART", 0, 2, states_0,
|
||||
"\070\000\000"},
|
||||
{257, "RULE", 0, 5, states_1,
|
||||
"\040\000\000"},
|
||||
{258, "RHS", 0, 2, states_2,
|
||||
"\040\010\003"},
|
||||
{259, "ALT", 0, 2, states_3,
|
||||
"\040\010\003"},
|
||||
{260, "ITEM", 0, 5, states_4,
|
||||
"\040\010\003"},
|
||||
{261, "ATOM", 0, 4, states_5,
|
||||
"\040\000\003"},
|
||||
};
|
||||
static label labels[19] = {
|
||||
{0, "EMPTY"},
|
||||
{256, 0},
|
||||
{257, 0},
|
||||
{4, 0},
|
||||
{0, 0},
|
||||
{1, 0},
|
||||
{11, 0},
|
||||
{258, 0},
|
||||
{259, 0},
|
||||
{18, 0},
|
||||
{260, 0},
|
||||
{9, 0},
|
||||
{10, 0},
|
||||
{261, 0},
|
||||
{16, 0},
|
||||
{14, 0},
|
||||
{3, 0},
|
||||
{7, 0},
|
||||
{8, 0},
|
||||
};
|
||||
static grammar gram = {
|
||||
6,
|
||||
dfas,
|
||||
{19, labels},
|
||||
256
|
||||
};
|
||||
|
||||
grammar *
|
||||
meta_grammar()
|
||||
{
|
||||
return &gram;
|
||||
}
|
47
Parser/node.c
Normal file
47
Parser/node.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* Parse tree node implementation */
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "malloc.h"
|
||||
#include "node.h"
|
||||
|
||||
node *
|
||||
newnode(type)
|
||||
int type;
|
||||
{
|
||||
node *n = NEW(node, 1);
|
||||
if (n == NULL)
|
||||
return NULL;
|
||||
n->n_type = type;
|
||||
n->n_str = NULL;
|
||||
n->n_nchildren = 0;
|
||||
n->n_child = NULL;
|
||||
return n;
|
||||
}
|
||||
|
||||
#define XXX 3 /* Node alignment factor to speed up realloc */
|
||||
#define XXXROUNDUP(n) ((n) == 1 ? 1 : ((n) + XXX - 1) / XXX * XXX)
|
||||
|
||||
node *
|
||||
addchild(n1, type, str)
|
||||
register node *n1;
|
||||
int type;
|
||||
char *str;
|
||||
{
|
||||
register int nch = n1->n_nchildren;
|
||||
register int nch1 = nch+1;
|
||||
register node *n;
|
||||
if (XXXROUNDUP(nch) < nch1) {
|
||||
n = n1->n_child;
|
||||
nch1 = XXXROUNDUP(nch1);
|
||||
RESIZE(n, node, nch1);
|
||||
if (n == NULL)
|
||||
return NULL;
|
||||
n1->n_child = n;
|
||||
}
|
||||
n = &n1->n_child[n1->n_nchildren++];
|
||||
n->n_type = type;
|
||||
n->n_str = str;
|
||||
n->n_nchildren = 0;
|
||||
n->n_child = NULL;
|
||||
return n;
|
||||
}
|
396
Parser/parser.c
Normal file
396
Parser/parser.c
Normal file
|
@ -0,0 +1,396 @@
|
|||
/* Parser implementation */
|
||||
|
||||
/* For a description, see the comments at end of this file */
|
||||
|
||||
/* XXX To do: error recovery */
|
||||
|
||||
#include <stdio.h>
|
||||
#include "assert.h"
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "malloc.h"
|
||||
#include "token.h"
|
||||
#include "grammar.h"
|
||||
#include "node.h"
|
||||
#include "parser.h"
|
||||
#include "errcode.h"
|
||||
|
||||
extern int debugging;
|
||||
|
||||
#ifdef DEBUG
|
||||
#define D(x) if (!debugging); else x
|
||||
#else
|
||||
#define D(x)
|
||||
#endif
|
||||
|
||||
|
||||
/* STACK DATA TYPE */
|
||||
|
||||
static void s_reset PROTO((stack *));
|
||||
|
||||
static void
|
||||
s_reset(s)
|
||||
stack *s;
|
||||
{
|
||||
s->s_top = &s->s_base[MAXSTACK];
|
||||
}
|
||||
|
||||
#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
|
||||
|
||||
static int s_push PROTO((stack *, dfa *, node *));
|
||||
|
||||
static int
|
||||
s_push(s, d, parent)
|
||||
register stack *s;
|
||||
dfa *d;
|
||||
node *parent;
|
||||
{
|
||||
register stackentry *top;
|
||||
if (s->s_top == s->s_base) {
|
||||
fprintf(stderr, "s_push: parser stack overflow\n");
|
||||
return -1;
|
||||
}
|
||||
top = --s->s_top;
|
||||
top->s_dfa = d;
|
||||
top->s_parent = parent;
|
||||
top->s_state = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
static void s_pop PROTO((stack *));
|
||||
|
||||
static void
|
||||
s_pop(s)
|
||||
register stack *s;
|
||||
{
|
||||
if (s_empty(s)) {
|
||||
fprintf(stderr, "s_pop: parser stack underflow -- FATAL\n");
|
||||
abort();
|
||||
}
|
||||
s->s_top++;
|
||||
}
|
||||
|
||||
#else /* !DEBUG */
|
||||
|
||||
#define s_pop(s) (s)->s_top++
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* PARSER CREATION */
|
||||
|
||||
parser_state *
|
||||
newparser(g, start)
|
||||
grammar *g;
|
||||
int start;
|
||||
{
|
||||
parser_state *ps;
|
||||
|
||||
if (!g->g_accel)
|
||||
addaccelerators(g);
|
||||
ps = NEW(parser_state, 1);
|
||||
if (ps == NULL)
|
||||
return NULL;
|
||||
ps->p_grammar = g;
|
||||
ps->p_tree = newnode(start);
|
||||
if (ps->p_tree == NULL) {
|
||||
if (ps->p_tree != NULL)
|
||||
DEL(ps->p_tree); /* XXX freeing a node!?! */
|
||||
DEL(ps);
|
||||
return NULL;
|
||||
}
|
||||
s_reset(&ps->p_stack);
|
||||
(void) s_push(&ps->p_stack, finddfa(g, start), ps->p_tree);
|
||||
return ps;
|
||||
}
|
||||
|
||||
void
|
||||
delparser(ps)
|
||||
parser_state *ps;
|
||||
{
|
||||
DEL(ps);
|
||||
}
|
||||
|
||||
|
||||
/* PARSER STACK OPERATIONS */
|
||||
|
||||
static int shift PROTO((stack *, int, char *, int));
|
||||
|
||||
static int
|
||||
shift(s, type, str, newstate)
|
||||
register stack *s;
|
||||
int type;
|
||||
char *str;
|
||||
int newstate;
|
||||
{
|
||||
assert(!s_empty(s));
|
||||
if (addchild(s->s_top->s_parent, type, str) == NULL) {
|
||||
fprintf(stderr, "shift: no mem in addchild\n");
|
||||
return -1;
|
||||
}
|
||||
s->s_top->s_state = newstate;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int push PROTO((stack *, int, dfa *, int));
|
||||
|
||||
static int
|
||||
push(s, type, d, newstate)
|
||||
register stack *s;
|
||||
int type;
|
||||
dfa *d;
|
||||
int newstate;
|
||||
{
|
||||
register node *n;
|
||||
n = s->s_top->s_parent;
|
||||
assert(!s_empty(s));
|
||||
if (addchild(n, type, (char *)NULL) == NULL) {
|
||||
fprintf(stderr, "push: no mem in addchild\n");
|
||||
return -1;
|
||||
}
|
||||
s->s_top->s_state = newstate;
|
||||
return s_push(s, d, CHILD(n, NCH(n)-1));
|
||||
}
|
||||
|
||||
|
||||
/* PARSER PROPER */
|
||||
|
||||
static int classify PROTO((grammar *, int, char *));
|
||||
|
||||
static int
|
||||
classify(g, type, str)
|
||||
grammar *g;
|
||||
register int type;
|
||||
char *str;
|
||||
{
|
||||
register int n = g->g_ll.ll_nlabels;
|
||||
|
||||
if (type == NAME) {
|
||||
register char *s = str;
|
||||
register label *l = g->g_ll.ll_label;
|
||||
register int i;
|
||||
for (i = n; i > 0; i--, l++) {
|
||||
if (l->lb_type == NAME && l->lb_str != NULL &&
|
||||
l->lb_str[0] == s[0] &&
|
||||
strcmp(l->lb_str, s) == 0) {
|
||||
D(printf("It's a keyword\n"));
|
||||
return n - i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
register label *l = g->g_ll.ll_label;
|
||||
register int i;
|
||||
for (i = n; i > 0; i--, l++) {
|
||||
if (l->lb_type == type && l->lb_str == NULL) {
|
||||
D(printf("It's a token we know\n"));
|
||||
return n - i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
D(printf("Illegal token\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
addtoken(ps, type, str)
|
||||
register parser_state *ps;
|
||||
register int type;
|
||||
char *str;
|
||||
{
|
||||
register int ilabel;
|
||||
|
||||
D(printf("Token %s/'%s' ... ", tok_name[type], str));
|
||||
|
||||
/* Find out which label this token is */
|
||||
ilabel = classify(ps->p_grammar, type, str);
|
||||
if (ilabel < 0)
|
||||
return E_SYNTAX;
|
||||
|
||||
/* Loop until the token is shifted or an error occurred */
|
||||
for (;;) {
|
||||
/* Fetch the current dfa and state */
|
||||
register dfa *d = ps->p_stack.s_top->s_dfa;
|
||||
register state *s = &d->d_state[ps->p_stack.s_top->s_state];
|
||||
|
||||
D(printf(" DFA '%s', state %d:",
|
||||
d->d_name, ps->p_stack.s_top->s_state));
|
||||
|
||||
/* Check accelerator */
|
||||
if (s->s_lower <= ilabel && ilabel < s->s_upper) {
|
||||
register int x = s->s_accel[ilabel - s->s_lower];
|
||||
if (x != -1) {
|
||||
if (x & (1<<7)) {
|
||||
/* Push non-terminal */
|
||||
int nt = (x >> 8) + NT_OFFSET;
|
||||
int arrow = x & ((1<<7)-1);
|
||||
dfa *d1 = finddfa(ps->p_grammar, nt);
|
||||
if (push(&ps->p_stack, nt, d1, arrow) < 0) {
|
||||
D(printf(" MemError: push.\n"));
|
||||
return E_NOMEM;
|
||||
}
|
||||
D(printf(" Push ...\n"));
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Shift the token */
|
||||
if (shift(&ps->p_stack, type, str, x) < 0) {
|
||||
D(printf(" MemError: shift.\n"));
|
||||
return E_NOMEM;
|
||||
}
|
||||
D(printf(" Shift.\n"));
|
||||
/* Pop while we are in an accept-only state */
|
||||
while (s = &d->d_state
|
||||
[ps->p_stack.s_top->s_state],
|
||||
s->s_accept && s->s_narcs == 1) {
|
||||
D(printf(" Direct pop.\n"));
|
||||
s_pop(&ps->p_stack);
|
||||
if (s_empty(&ps->p_stack)) {
|
||||
D(printf(" ACCEPT.\n"));
|
||||
return E_DONE;
|
||||
}
|
||||
d = ps->p_stack.s_top->s_dfa;
|
||||
}
|
||||
return E_OK;
|
||||
}
|
||||
}
|
||||
|
||||
if (s->s_accept) {
|
||||
/* Pop this dfa and try again */
|
||||
s_pop(&ps->p_stack);
|
||||
D(printf(" Pop ...\n"));
|
||||
if (s_empty(&ps->p_stack)) {
|
||||
D(printf(" Error: bottom of stack.\n"));
|
||||
return E_SYNTAX;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Stuck, report syntax error */
|
||||
D(printf(" Error.\n"));
|
||||
return E_SYNTAX;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
/* DEBUG OUTPUT */
|
||||
|
||||
void
|
||||
dumptree(g, n)
|
||||
grammar *g;
|
||||
node *n;
|
||||
{
|
||||
int i;
|
||||
|
||||
if (n == NULL)
|
||||
printf("NIL");
|
||||
else {
|
||||
label l;
|
||||
l.lb_type = TYPE(n);
|
||||
l.lb_str = TYPE(str);
|
||||
printf("%s", labelrepr(&l));
|
||||
if (ISNONTERMINAL(TYPE(n))) {
|
||||
printf("(");
|
||||
for (i = 0; i < NCH(n); i++) {
|
||||
if (i > 0)
|
||||
printf(",");
|
||||
dumptree(g, CHILD(n, i));
|
||||
}
|
||||
printf(")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
showtree(g, n)
|
||||
grammar *g;
|
||||
node *n;
|
||||
{
|
||||
int i;
|
||||
|
||||
if (n == NULL)
|
||||
return;
|
||||
if (ISNONTERMINAL(TYPE(n))) {
|
||||
for (i = 0; i < NCH(n); i++)
|
||||
showtree(g, CHILD(n, i));
|
||||
}
|
||||
else if (ISTERMINAL(TYPE(n))) {
|
||||
printf("%s", tok_name[TYPE(n)]);
|
||||
if (TYPE(n) == NUMBER || TYPE(n) == NAME)
|
||||
printf("(%s)", STR(n));
|
||||
printf(" ");
|
||||
}
|
||||
else
|
||||
printf("? ");
|
||||
}
|
||||
|
||||
void
|
||||
printtree(ps)
|
||||
parser_state *ps;
|
||||
{
|
||||
if (debugging) {
|
||||
printf("Parse tree:\n");
|
||||
dumptree(ps->p_grammar, ps->p_tree);
|
||||
printf("\n");
|
||||
printf("Tokens:\n");
|
||||
showtree(ps->p_grammar, ps->p_tree);
|
||||
printf("\n");
|
||||
}
|
||||
printf("Listing:\n");
|
||||
listtree(ps->p_tree);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#endif /* DEBUG */
|
||||
|
||||
/*
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The parser's interface is different than usual: the function addtoken()
|
||||
must be called for each token in the input. This makes it possible to
|
||||
turn it into an incremental parsing system later. The parsing system
|
||||
constructs a parse tree as it goes.
|
||||
|
||||
A parsing rule is represented as a Deterministic Finite-state Automaton
|
||||
(DFA). A node in a DFA represents a state of the parser; an arc represents
|
||||
a transition. Transitions are either labeled with terminal symbols or
|
||||
with non-terminals. When the parser decides to follow an arc labeled
|
||||
with a non-terminal, it is invoked recursively with the DFA representing
|
||||
the parsing rule for that as its initial state; when that DFA accepts,
|
||||
the parser that invoked it continues. The parse tree constructed by the
|
||||
recursively called parser is inserted as a child in the current parse tree.
|
||||
|
||||
The DFA's can be constructed automatically from a more conventional
|
||||
language description. An extended LL(1) grammar (ELL(1)) is suitable.
|
||||
Certain restrictions make the parser's life easier: rules that can produce
|
||||
the empty string should be outlawed (there are other ways to put loops
|
||||
or optional parts in the language). To avoid the need to construct
|
||||
FIRST sets, we can require that all but the last alternative of a rule
|
||||
(really: arc going out of a DFA's state) must begin with a terminal
|
||||
symbol.
|
||||
|
||||
As an example, consider this grammar:
|
||||
|
||||
expr: term (OP term)*
|
||||
term: CONSTANT | '(' expr ')'
|
||||
|
||||
The DFA corresponding to the rule for expr is:
|
||||
|
||||
------->.---term-->.------->
|
||||
^ |
|
||||
| |
|
||||
\----OP----/
|
||||
|
||||
The parse tree generated for the input a+b is:
|
||||
|
||||
(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
|
||||
|
||||
*/
|
25
Parser/parser.h
Normal file
25
Parser/parser.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* Parser interface */
|
||||
|
||||
#define MAXSTACK 100
|
||||
|
||||
typedef struct _stackentry {
|
||||
int s_state; /* State in current DFA */
|
||||
dfa *s_dfa; /* Current DFA */
|
||||
node *s_parent; /* Where to add next node */
|
||||
} stackentry;
|
||||
|
||||
typedef struct _stack {
|
||||
stackentry *s_top; /* Top entry */
|
||||
stackentry s_base[MAXSTACK];/* Array of stack entries */
|
||||
/* NB The stack grows down */
|
||||
} stack;
|
||||
|
||||
typedef struct {
|
||||
struct _stack p_stack; /* Stack of parser states */
|
||||
struct _grammar *p_grammar; /* Grammar to use */
|
||||
struct _node *p_tree; /* Top of parse tree */
|
||||
} parser_state;
|
||||
|
||||
parser_state *newparser PROTO((struct _grammar *g, int start));
|
||||
void delparser PROTO((parser_state *ps));
|
||||
int addtoken PROTO((parser_state *ps, int type, char *str));
|
131
Parser/parsetok.c
Normal file
131
Parser/parsetok.c
Normal file
|
@ -0,0 +1,131 @@
|
|||
/* Parser-tokenizer link implementation */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "malloc.h"
|
||||
#include "tokenizer.h"
|
||||
#include "node.h"
|
||||
#include "grammar.h"
|
||||
#include "parser.h"
|
||||
#include "errcode.h"
|
||||
|
||||
extern int debugging;
|
||||
|
||||
|
||||
/* Parse input coming from the given tokenizer structure.
|
||||
Return error code. */
|
||||
|
||||
static int
|
||||
parsetok(tok, g, start, n_ret)
|
||||
struct tok_state *tok;
|
||||
grammar *g;
|
||||
int start;
|
||||
node **n_ret;
|
||||
{
|
||||
parser_state *ps;
|
||||
int ret;
|
||||
|
||||
if ((ps = newparser(g, start)) == NULL) {
|
||||
fprintf(stderr, "no mem for new parser\n");
|
||||
return E_NOMEM;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
char *a, *b;
|
||||
int type;
|
||||
int len;
|
||||
char *str;
|
||||
|
||||
type = tok_get(tok, &a, &b);
|
||||
if (type == ERRORTOKEN) {
|
||||
ret = tok->done;
|
||||
break;
|
||||
}
|
||||
len = b - a;
|
||||
str = NEW(char, len + 1);
|
||||
if (str == NULL) {
|
||||
fprintf(stderr, "no mem for next token\n");
|
||||
ret = E_NOMEM;
|
||||
break;
|
||||
}
|
||||
strncpy(str, a, len);
|
||||
str[len] = '\0';
|
||||
ret = addtoken(ps, (int)type, str);
|
||||
if (ret != E_OK) {
|
||||
if (ret == E_DONE)
|
||||
*n_ret = ps->p_tree;
|
||||
else if (tok->lineno <= 1 && tok->done == E_EOF)
|
||||
ret = E_EOF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
delparser(ps);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* Parse input coming from a string. Return error code. */
|
||||
|
||||
int
|
||||
parsestring(s, g, start, n_ret)
|
||||
char *s;
|
||||
grammar *g;
|
||||
int start;
|
||||
node **n_ret;
|
||||
{
|
||||
struct tok_state *tok = tok_setups(s);
|
||||
int ret;
|
||||
|
||||
if (tok == NULL) {
|
||||
fprintf(stderr, "no mem for tok_setups\n");
|
||||
return E_NOMEM;
|
||||
}
|
||||
ret = parsetok(tok, g, start, n_ret);
|
||||
if (ret == E_TOKEN || ret == E_SYNTAX) {
|
||||
fprintf(stderr, "String parsing error at line %d\n",
|
||||
tok->lineno);
|
||||
}
|
||||
tok_free(tok);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* Parse input coming from a file. Return error code. */
|
||||
|
||||
int
|
||||
parsefile(fp, g, start, ps1, ps2, n_ret)
|
||||
FILE *fp;
|
||||
grammar *g;
|
||||
int start;
|
||||
char *ps1, *ps2;
|
||||
node **n_ret;
|
||||
{
|
||||
struct tok_state *tok = tok_setupf(fp, ps1, ps2);
|
||||
int ret;
|
||||
|
||||
if (tok == NULL) {
|
||||
fprintf(stderr, "no mem for tok_setupf\n");
|
||||
return E_NOMEM;
|
||||
}
|
||||
ret = parsetok(tok, g, start, n_ret);
|
||||
if (ret == E_TOKEN || ret == E_SYNTAX) {
|
||||
char *p;
|
||||
fprintf(stderr, "Parsing error at line %d:\n",
|
||||
tok->lineno);
|
||||
*tok->inp = '\0';
|
||||
if (tok->inp > tok->buf && tok->inp[-1] == '\n')
|
||||
tok->inp[-1] = '\0';
|
||||
fprintf(stderr, "%s\n", tok->buf);
|
||||
for (p = tok->buf; p < tok->cur; p++) {
|
||||
if (*p == '\t')
|
||||
putc('\t', stderr);
|
||||
else
|
||||
putc(' ', stderr);
|
||||
}
|
||||
fprintf(stderr, "^\n");
|
||||
}
|
||||
tok_free(tok);
|
||||
return ret;
|
||||
}
|
729
Parser/pgen.c
Normal file
729
Parser/pgen.c
Normal file
|
@ -0,0 +1,729 @@
|
|||
/* Parser generator */
|
||||
|
||||
/* For a description, see the comments at end of this file */
|
||||
|
||||
#include <stdio.h>
|
||||
#include "assert.h"
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "malloc.h"
|
||||
#include "token.h"
|
||||
#include "node.h"
|
||||
#include "grammar.h"
|
||||
#include "metagrammar.h"
|
||||
#include "pgen.h"
|
||||
|
||||
extern int debugging;
|
||||
|
||||
|
||||
/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
|
||||
|
||||
typedef struct _nfaarc {
|
||||
int ar_label;
|
||||
int ar_arrow;
|
||||
} nfaarc;
|
||||
|
||||
typedef struct _nfastate {
|
||||
int st_narcs;
|
||||
nfaarc *st_arc;
|
||||
} nfastate;
|
||||
|
||||
typedef struct _nfa {
|
||||
int nf_type;
|
||||
char *nf_name;
|
||||
int nf_nstates;
|
||||
nfastate *nf_state;
|
||||
int nf_start, nf_finish;
|
||||
} nfa;
|
||||
|
||||
static int
|
||||
addnfastate(nf)
|
||||
nfa *nf;
|
||||
{
|
||||
nfastate *st;
|
||||
|
||||
RESIZE(nf->nf_state, nfastate, nf->nf_nstates + 1);
|
||||
if (nf->nf_state == NULL)
|
||||
fatal("out of mem");
|
||||
st = &nf->nf_state[nf->nf_nstates++];
|
||||
st->st_narcs = 0;
|
||||
st->st_arc = NULL;
|
||||
return st - nf->nf_state;
|
||||
}
|
||||
|
||||
static void
|
||||
addnfaarc(nf, from, to, lbl)
|
||||
nfa *nf;
|
||||
int from, to, lbl;
|
||||
{
|
||||
nfastate *st;
|
||||
nfaarc *ar;
|
||||
|
||||
st = &nf->nf_state[from];
|
||||
RESIZE(st->st_arc, nfaarc, st->st_narcs + 1);
|
||||
if (st->st_arc == NULL)
|
||||
fatal("out of mem");
|
||||
ar = &st->st_arc[st->st_narcs++];
|
||||
ar->ar_label = lbl;
|
||||
ar->ar_arrow = to;
|
||||
}
|
||||
|
||||
static nfa *
|
||||
newnfa(name)
|
||||
char *name;
|
||||
{
|
||||
nfa *nf;
|
||||
static type = NT_OFFSET; /* All types will be disjunct */
|
||||
|
||||
nf = NEW(nfa, 1);
|
||||
if (nf == NULL)
|
||||
fatal("no mem for new nfa");
|
||||
nf->nf_type = type++;
|
||||
nf->nf_name = name; /* XXX strdup(name) ??? */
|
||||
nf->nf_nstates = 0;
|
||||
nf->nf_state = NULL;
|
||||
nf->nf_start = nf->nf_finish = -1;
|
||||
return nf;
|
||||
}
|
||||
|
||||
typedef struct _nfagrammar {
|
||||
int gr_nnfas;
|
||||
nfa **gr_nfa;
|
||||
labellist gr_ll;
|
||||
} nfagrammar;
|
||||
|
||||
static nfagrammar *
|
||||
newnfagrammar()
|
||||
{
|
||||
nfagrammar *gr;
|
||||
|
||||
gr = NEW(nfagrammar, 1);
|
||||
if (gr == NULL)
|
||||
fatal("no mem for new nfa grammar");
|
||||
gr->gr_nnfas = 0;
|
||||
gr->gr_nfa = NULL;
|
||||
gr->gr_ll.ll_nlabels = 0;
|
||||
gr->gr_ll.ll_label = NULL;
|
||||
addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
|
||||
return gr;
|
||||
}
|
||||
|
||||
static nfa *
|
||||
addnfa(gr, name)
|
||||
nfagrammar *gr;
|
||||
char *name;
|
||||
{
|
||||
nfa *nf;
|
||||
|
||||
nf = newnfa(name);
|
||||
RESIZE(gr->gr_nfa, nfa *, gr->gr_nnfas + 1);
|
||||
if (gr->gr_nfa == NULL)
|
||||
fatal("out of mem");
|
||||
gr->gr_nfa[gr->gr_nnfas++] = nf;
|
||||
addlabel(&gr->gr_ll, NAME, nf->nf_name);
|
||||
return nf;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
static char REQNFMT[] = "metacompile: less than %d children\n";
|
||||
|
||||
#define REQN(i, count) \
|
||||
if (i < count) { \
|
||||
fprintf(stderr, REQNFMT, count); \
|
||||
abort(); \
|
||||
} else
|
||||
|
||||
#else
|
||||
#define REQN(i, count) /* empty */
|
||||
#endif
|
||||
|
||||
static nfagrammar *
|
||||
metacompile(n)
|
||||
node *n;
|
||||
{
|
||||
nfagrammar *gr;
|
||||
int i;
|
||||
|
||||
printf("Compiling (meta-) parse tree into NFA grammar\n");
|
||||
gr = newnfagrammar();
|
||||
REQ(n, MSTART);
|
||||
i = n->n_nchildren - 1; /* Last child is ENDMARKER */
|
||||
n = n->n_child;
|
||||
for (; --i >= 0; n++) {
|
||||
if (n->n_type != NEWLINE)
|
||||
compile_rule(gr, n);
|
||||
}
|
||||
return gr;
|
||||
}
|
||||
|
||||
static
|
||||
compile_rule(gr, n)
|
||||
nfagrammar *gr;
|
||||
node *n;
|
||||
{
|
||||
nfa *nf;
|
||||
|
||||
REQ(n, RULE);
|
||||
REQN(n->n_nchildren, 4);
|
||||
n = n->n_child;
|
||||
REQ(n, NAME);
|
||||
nf = addnfa(gr, n->n_str);
|
||||
n++;
|
||||
REQ(n, COLON);
|
||||
n++;
|
||||
REQ(n, RHS);
|
||||
compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
|
||||
n++;
|
||||
REQ(n, NEWLINE);
|
||||
}
|
||||
|
||||
static
|
||||
compile_rhs(ll, nf, n, pa, pb)
|
||||
labellist *ll;
|
||||
nfa *nf;
|
||||
node *n;
|
||||
int *pa, *pb;
|
||||
{
|
||||
int i;
|
||||
int a, b;
|
||||
|
||||
REQ(n, RHS);
|
||||
i = n->n_nchildren;
|
||||
REQN(i, 1);
|
||||
n = n->n_child;
|
||||
REQ(n, ALT);
|
||||
compile_alt(ll, nf, n, pa, pb);
|
||||
if (--i <= 0)
|
||||
return;
|
||||
n++;
|
||||
a = *pa;
|
||||
b = *pb;
|
||||
*pa = addnfastate(nf);
|
||||
*pb = addnfastate(nf);
|
||||
addnfaarc(nf, *pa, a, EMPTY);
|
||||
addnfaarc(nf, b, *pb, EMPTY);
|
||||
for (; --i >= 0; n++) {
|
||||
REQ(n, VBAR);
|
||||
REQN(i, 1);
|
||||
--i;
|
||||
n++;
|
||||
REQ(n, ALT);
|
||||
compile_alt(ll, nf, n, &a, &b);
|
||||
addnfaarc(nf, *pa, a, EMPTY);
|
||||
addnfaarc(nf, b, *pb, EMPTY);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
compile_alt(ll, nf, n, pa, pb)
|
||||
labellist *ll;
|
||||
nfa *nf;
|
||||
node *n;
|
||||
int *pa, *pb;
|
||||
{
|
||||
int i;
|
||||
int a, b;
|
||||
|
||||
REQ(n, ALT);
|
||||
i = n->n_nchildren;
|
||||
REQN(i, 1);
|
||||
n = n->n_child;
|
||||
REQ(n, ITEM);
|
||||
compile_item(ll, nf, n, pa, pb);
|
||||
--i;
|
||||
n++;
|
||||
for (; --i >= 0; n++) {
|
||||
if (n->n_type == COMMA) { /* XXX Temporary */
|
||||
REQN(i, 1);
|
||||
--i;
|
||||
n++;
|
||||
}
|
||||
REQ(n, ITEM);
|
||||
compile_item(ll, nf, n, &a, &b);
|
||||
addnfaarc(nf, *pb, a, EMPTY);
|
||||
*pb = b;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
compile_item(ll, nf, n, pa, pb)
|
||||
labellist *ll;
|
||||
nfa *nf;
|
||||
node *n;
|
||||
int *pa, *pb;
|
||||
{
|
||||
int i;
|
||||
int a, b;
|
||||
|
||||
REQ(n, ITEM);
|
||||
i = n->n_nchildren;
|
||||
REQN(i, 1);
|
||||
n = n->n_child;
|
||||
if (n->n_type == LSQB) {
|
||||
REQN(i, 3);
|
||||
n++;
|
||||
REQ(n, RHS);
|
||||
*pa = addnfastate(nf);
|
||||
*pb = addnfastate(nf);
|
||||
addnfaarc(nf, *pa, *pb, EMPTY);
|
||||
compile_rhs(ll, nf, n, &a, &b);
|
||||
addnfaarc(nf, *pa, a, EMPTY);
|
||||
addnfaarc(nf, b, *pb, EMPTY);
|
||||
REQN(i, 1);
|
||||
n++;
|
||||
REQ(n, RSQB);
|
||||
}
|
||||
else {
|
||||
compile_atom(ll, nf, n, pa, pb);
|
||||
if (--i <= 0)
|
||||
return;
|
||||
n++;
|
||||
addnfaarc(nf, *pb, *pa, EMPTY);
|
||||
if (n->n_type == STAR)
|
||||
*pb = *pa;
|
||||
else
|
||||
REQ(n, PLUS);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
compile_atom(ll, nf, n, pa, pb)
|
||||
labellist *ll;
|
||||
nfa *nf;
|
||||
node *n;
|
||||
int *pa, *pb;
|
||||
{
|
||||
int i;
|
||||
|
||||
REQ(n, ATOM);
|
||||
i = n->n_nchildren;
|
||||
REQN(i, 1);
|
||||
n = n->n_child;
|
||||
if (n->n_type == LPAR) {
|
||||
REQN(i, 3);
|
||||
n++;
|
||||
REQ(n, RHS);
|
||||
compile_rhs(ll, nf, n, pa, pb);
|
||||
n++;
|
||||
REQ(n, RPAR);
|
||||
}
|
||||
else if (n->n_type == NAME || n->n_type == STRING) {
|
||||
*pa = addnfastate(nf);
|
||||
*pb = addnfastate(nf);
|
||||
addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
|
||||
}
|
||||
else
|
||||
REQ(n, NAME);
|
||||
}
|
||||
|
||||
static void
|
||||
dumpstate(ll, nf, istate)
|
||||
labellist *ll;
|
||||
nfa *nf;
|
||||
int istate;
|
||||
{
|
||||
nfastate *st;
|
||||
int i;
|
||||
nfaarc *ar;
|
||||
|
||||
printf("%c%2d%c",
|
||||
istate == nf->nf_start ? '*' : ' ',
|
||||
istate,
|
||||
istate == nf->nf_finish ? '.' : ' ');
|
||||
st = &nf->nf_state[istate];
|
||||
ar = st->st_arc;
|
||||
for (i = 0; i < st->st_narcs; i++) {
|
||||
if (i > 0)
|
||||
printf("\n ");
|
||||
printf("-> %2d %s", ar->ar_arrow,
|
||||
labelrepr(&ll->ll_label[ar->ar_label]));
|
||||
ar++;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
dumpnfa(ll, nf)
|
||||
labellist *ll;
|
||||
nfa *nf;
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("NFA '%s' has %d states; start %d, finish %d\n",
|
||||
nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
|
||||
for (i = 0; i < nf->nf_nstates; i++)
|
||||
dumpstate(ll, nf, i);
|
||||
}
|
||||
|
||||
|
||||
/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
|
||||
|
||||
static int
|
||||
addclosure(ss, nf, istate)
|
||||
bitset ss;
|
||||
nfa *nf;
|
||||
int istate;
|
||||
{
|
||||
if (addbit(ss, istate)) {
|
||||
nfastate *st = &nf->nf_state[istate];
|
||||
nfaarc *ar = st->st_arc;
|
||||
int i;
|
||||
|
||||
for (i = st->st_narcs; --i >= 0; ) {
|
||||
if (ar->ar_label == EMPTY)
|
||||
addclosure(ss, nf, ar->ar_arrow);
|
||||
ar++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct _ss_arc {
|
||||
bitset sa_bitset;
|
||||
int sa_arrow;
|
||||
int sa_label;
|
||||
} ss_arc;
|
||||
|
||||
typedef struct _ss_state {
|
||||
bitset ss_ss;
|
||||
int ss_narcs;
|
||||
ss_arc *ss_arc;
|
||||
int ss_deleted;
|
||||
int ss_finish;
|
||||
int ss_rename;
|
||||
} ss_state;
|
||||
|
||||
typedef struct _ss_dfa {
|
||||
int sd_nstates;
|
||||
ss_state *sd_state;
|
||||
} ss_dfa;
|
||||
|
||||
static
|
||||
makedfa(gr, nf, d)
|
||||
nfagrammar *gr;
|
||||
nfa *nf;
|
||||
dfa *d;
|
||||
{
|
||||
int nbits = nf->nf_nstates;
|
||||
bitset ss;
|
||||
int xx_nstates;
|
||||
ss_state *xx_state, *yy;
|
||||
ss_arc *zz;
|
||||
int istate, jstate, iarc, jarc, ibit;
|
||||
nfastate *st;
|
||||
nfaarc *ar;
|
||||
|
||||
ss = newbitset(nbits);
|
||||
addclosure(ss, nf, nf->nf_start);
|
||||
xx_state = NEW(ss_state, 1);
|
||||
if (xx_state == NULL)
|
||||
fatal("no mem for xx_state in makedfa");
|
||||
xx_nstates = 1;
|
||||
yy = &xx_state[0];
|
||||
yy->ss_ss = ss;
|
||||
yy->ss_narcs = 0;
|
||||
yy->ss_arc = NULL;
|
||||
yy->ss_deleted = 0;
|
||||
yy->ss_finish = testbit(ss, nf->nf_finish);
|
||||
if (yy->ss_finish)
|
||||
printf("Error: nonterminal '%s' may produce empty.\n",
|
||||
nf->nf_name);
|
||||
|
||||
/* This algorithm is from a book written before
|
||||
the invention of structured programming... */
|
||||
|
||||
/* For each unmarked state... */
|
||||
for (istate = 0; istate < xx_nstates; ++istate) {
|
||||
yy = &xx_state[istate];
|
||||
ss = yy->ss_ss;
|
||||
/* For all its states... */
|
||||
for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
|
||||
if (!testbit(ss, ibit))
|
||||
continue;
|
||||
st = &nf->nf_state[ibit];
|
||||
/* For all non-empty arcs from this state... */
|
||||
for (iarc = 0; iarc < st->st_narcs; iarc++) {
|
||||
ar = &st->st_arc[iarc];
|
||||
if (ar->ar_label == EMPTY)
|
||||
continue;
|
||||
/* Look up in list of arcs from this state */
|
||||
for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
|
||||
zz = &yy->ss_arc[jarc];
|
||||
if (ar->ar_label == zz->sa_label)
|
||||
goto found;
|
||||
}
|
||||
/* Add new arc for this state */
|
||||
RESIZE(yy->ss_arc, ss_arc, yy->ss_narcs + 1);
|
||||
if (yy->ss_arc == NULL)
|
||||
fatal("out of mem");
|
||||
zz = &yy->ss_arc[yy->ss_narcs++];
|
||||
zz->sa_label = ar->ar_label;
|
||||
zz->sa_bitset = newbitset(nbits);
|
||||
zz->sa_arrow = -1;
|
||||
found: ;
|
||||
/* Add destination */
|
||||
addclosure(zz->sa_bitset, nf, ar->ar_arrow);
|
||||
}
|
||||
}
|
||||
/* Now look up all the arrow states */
|
||||
for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
|
||||
zz = &xx_state[istate].ss_arc[jarc];
|
||||
for (jstate = 0; jstate < xx_nstates; jstate++) {
|
||||
if (samebitset(zz->sa_bitset,
|
||||
xx_state[jstate].ss_ss, nbits)) {
|
||||
zz->sa_arrow = jstate;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
RESIZE(xx_state, ss_state, xx_nstates + 1);
|
||||
if (xx_state == NULL)
|
||||
fatal("out of mem");
|
||||
zz->sa_arrow = xx_nstates;
|
||||
yy = &xx_state[xx_nstates++];
|
||||
yy->ss_ss = zz->sa_bitset;
|
||||
yy->ss_narcs = 0;
|
||||
yy->ss_arc = NULL;
|
||||
yy->ss_deleted = 0;
|
||||
yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
|
||||
done: ;
|
||||
}
|
||||
}
|
||||
|
||||
if (debugging)
|
||||
printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
|
||||
"before minimizing");
|
||||
|
||||
simplify(xx_nstates, xx_state);
|
||||
|
||||
if (debugging)
|
||||
printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
|
||||
"after minimizing");
|
||||
|
||||
convert(d, xx_nstates, xx_state);
|
||||
|
||||
/* XXX cleanup */
|
||||
}
|
||||
|
||||
static
|
||||
printssdfa(xx_nstates, xx_state, nbits, ll, msg)
|
||||
int xx_nstates;
|
||||
ss_state *xx_state;
|
||||
int nbits;
|
||||
labellist *ll;
|
||||
char *msg;
|
||||
{
|
||||
int i, ibit, iarc;
|
||||
ss_state *yy;
|
||||
ss_arc *zz;
|
||||
|
||||
printf("Subset DFA %s\n", msg);
|
||||
for (i = 0; i < xx_nstates; i++) {
|
||||
yy = &xx_state[i];
|
||||
if (yy->ss_deleted)
|
||||
continue;
|
||||
printf(" Subset %d", i);
|
||||
if (yy->ss_finish)
|
||||
printf(" (finish)");
|
||||
printf(" { ");
|
||||
for (ibit = 0; ibit < nbits; ibit++) {
|
||||
if (testbit(yy->ss_ss, ibit))
|
||||
printf("%d ", ibit);
|
||||
}
|
||||
printf("}\n");
|
||||
for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
|
||||
zz = &yy->ss_arc[iarc];
|
||||
printf(" Arc to state %d, label %s\n",
|
||||
zz->sa_arrow,
|
||||
labelrepr(&ll->ll_label[zz->sa_label]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* PART THREE -- SIMPLIFY DFA */
|
||||
|
||||
/* Simplify the DFA by repeatedly eliminating states that are
|
||||
equivalent to another oner. This is NOT Algorithm 3.3 from
|
||||
[Aho&Ullman 77]. It does not always finds the minimal DFA,
|
||||
but it does usually make a much smaller one... (For an example
|
||||
of sub-optimal behaviour, try S: x a b+ | y a b+.)
|
||||
*/
|
||||
|
||||
static int
|
||||
samestate(s1, s2)
|
||||
ss_state *s1, *s2;
|
||||
{
|
||||
int i;
|
||||
|
||||
if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
|
||||
return 0;
|
||||
for (i = 0; i < s1->ss_narcs; i++) {
|
||||
if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
|
||||
s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
renamestates(xx_nstates, xx_state, from, to)
|
||||
int xx_nstates;
|
||||
ss_state *xx_state;
|
||||
int from, to;
|
||||
{
|
||||
int i, j;
|
||||
|
||||
if (debugging)
|
||||
printf("Rename state %d to %d.\n", from, to);
|
||||
for (i = 0; i < xx_nstates; i++) {
|
||||
if (xx_state[i].ss_deleted)
|
||||
continue;
|
||||
for (j = 0; j < xx_state[i].ss_narcs; j++) {
|
||||
if (xx_state[i].ss_arc[j].sa_arrow == from)
|
||||
xx_state[i].ss_arc[j].sa_arrow = to;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
simplify(xx_nstates, xx_state)
|
||||
int xx_nstates;
|
||||
ss_state *xx_state;
|
||||
{
|
||||
int changes;
|
||||
int i, j, k;
|
||||
|
||||
do {
|
||||
changes = 0;
|
||||
for (i = 1; i < xx_nstates; i++) {
|
||||
if (xx_state[i].ss_deleted)
|
||||
continue;
|
||||
for (j = 0; j < i; j++) {
|
||||
if (xx_state[j].ss_deleted)
|
||||
continue;
|
||||
if (samestate(&xx_state[i], &xx_state[j])) {
|
||||
xx_state[i].ss_deleted++;
|
||||
renamestates(xx_nstates, xx_state, i, j);
|
||||
changes++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (changes);
|
||||
}
|
||||
|
||||
|
||||
/* PART FOUR -- GENERATE PARSING TABLES */
|
||||
|
||||
/* Convert the DFA into a grammar that can be used by our parser */
|
||||
|
||||
static
|
||||
convert(d, xx_nstates, xx_state)
|
||||
dfa *d;
|
||||
int xx_nstates;
|
||||
ss_state *xx_state;
|
||||
{
|
||||
int i, j;
|
||||
ss_state *yy;
|
||||
ss_arc *zz;
|
||||
|
||||
for (i = 0; i < xx_nstates; i++) {
|
||||
yy = &xx_state[i];
|
||||
if (yy->ss_deleted)
|
||||
continue;
|
||||
yy->ss_rename = addstate(d);
|
||||
}
|
||||
|
||||
for (i = 0; i < xx_nstates; i++) {
|
||||
yy = &xx_state[i];
|
||||
if (yy->ss_deleted)
|
||||
continue;
|
||||
for (j = 0; j < yy->ss_narcs; j++) {
|
||||
zz = &yy->ss_arc[j];
|
||||
addarc(d, yy->ss_rename,
|
||||
xx_state[zz->sa_arrow].ss_rename,
|
||||
zz->sa_label);
|
||||
}
|
||||
if (yy->ss_finish)
|
||||
addarc(d, yy->ss_rename, yy->ss_rename, 0);
|
||||
}
|
||||
|
||||
d->d_initial = 0;
|
||||
}
|
||||
|
||||
|
||||
/* PART FIVE -- GLUE IT ALL TOGETHER */
|
||||
|
||||
static grammar *
|
||||
maketables(gr)
|
||||
nfagrammar *gr;
|
||||
{
|
||||
int i;
|
||||
nfa *nf;
|
||||
dfa *d;
|
||||
grammar *g;
|
||||
|
||||
if (gr->gr_nnfas == 0)
|
||||
return NULL;
|
||||
g = newgrammar(gr->gr_nfa[0]->nf_type);
|
||||
/* XXX first rule must be start rule */
|
||||
g->g_ll = gr->gr_ll;
|
||||
|
||||
for (i = 0; i < gr->gr_nnfas; i++) {
|
||||
nf = gr->gr_nfa[i];
|
||||
if (debugging) {
|
||||
printf("Dump of NFA for '%s' ...\n", nf->nf_name);
|
||||
dumpnfa(&gr->gr_ll, nf);
|
||||
}
|
||||
printf("Making DFA for '%s' ...\n", nf->nf_name);
|
||||
d = adddfa(g, nf->nf_type, nf->nf_name);
|
||||
makedfa(gr, gr->gr_nfa[i], d);
|
||||
}
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
grammar *
|
||||
pgen(n)
|
||||
node *n;
|
||||
{
|
||||
nfagrammar *gr;
|
||||
grammar *g;
|
||||
|
||||
gr = metacompile(n);
|
||||
g = maketables(gr);
|
||||
translatelabels(g);
|
||||
addfirstsets(g);
|
||||
return g;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
Input is a grammar in extended BNF (using * for repetition, + for
|
||||
at-least-once repetition, [] for optional parts, | for alternatives and
|
||||
() for grouping). This has already been parsed and turned into a parse
|
||||
tree.
|
||||
|
||||
Each rule is considered as a regular expression in its own right.
|
||||
It is turned into a Non-deterministic Finite Automaton (NFA), which
|
||||
is then turned into a Deterministic Finite Automaton (DFA), which is then
|
||||
optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
|
||||
or similar compiler books (this technique is more often used for lexical
|
||||
analyzers).
|
||||
|
||||
The DFA's are used by the parser as parsing tables in a special way
|
||||
that's probably unique. Before they are usable, the FIRST sets of all
|
||||
non-terminals are computed.
|
||||
|
||||
Reference
|
||||
---------
|
||||
|
||||
[Aho&Ullman 77]
|
||||
Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
|
||||
(first edition)
|
||||
|
||||
*/
|
6
Parser/pgen.h
Normal file
6
Parser/pgen.h
Normal file
|
@ -0,0 +1,6 @@
|
|||
/* Parser generator interface */
|
||||
|
||||
extern grammar gram;
|
||||
|
||||
extern grammar *meta_grammar PROTO((void));
|
||||
extern grammar *pgen PROTO((node *));
|
111
Parser/pgenmain.c
Normal file
111
Parser/pgenmain.c
Normal file
|
@ -0,0 +1,111 @@
|
|||
/* Parser generator main program */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "grammar.h"
|
||||
#include "node.h"
|
||||
#include "parsetok.h"
|
||||
#include "pgen.h"
|
||||
|
||||
int debugging;
|
||||
|
||||
#ifdef THINK_C
|
||||
char *
|
||||
askfile()
|
||||
{
|
||||
char buf[256];
|
||||
static char name[256];
|
||||
printf("Input file name: ");
|
||||
if (fgets(buf, sizeof buf, stdin) == NULL) {
|
||||
printf("EOF\n");
|
||||
exit(1);
|
||||
}
|
||||
if (sscanf(buf, " %s ", name) != 1) {
|
||||
printf("No file\n");
|
||||
exit(1);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
#endif
|
||||
|
||||
grammar *
|
||||
getgrammar(filename)
|
||||
char *filename;
|
||||
{
|
||||
FILE *fp;
|
||||
node *n;
|
||||
grammar *g0, *g;
|
||||
|
||||
fp = fopen(filename, "r");
|
||||
if (fp == NULL) {
|
||||
perror(filename);
|
||||
exit(1);
|
||||
}
|
||||
g0 = meta_grammar();
|
||||
n = NULL;
|
||||
parsefile(fp, g0, g0->g_start, (char *)NULL, (char *)NULL, &n);
|
||||
fclose(fp);
|
||||
if (n == NULL) {
|
||||
fprintf(stderr, "Parsing error.\n");
|
||||
exit(1);
|
||||
}
|
||||
g = pgen(n);
|
||||
if (g == NULL) {
|
||||
printf("Bad grammar.\n");
|
||||
exit(1);
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
main(argc, argv)
|
||||
int argc;
|
||||
char **argv;
|
||||
{
|
||||
grammar *g;
|
||||
node *n;
|
||||
FILE *fp;
|
||||
char *filename;
|
||||
|
||||
#ifdef THINK_C
|
||||
filename = askfile();
|
||||
#else
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "usage: %s grammar\n", argv[0]);
|
||||
exit(2);
|
||||
}
|
||||
filename = argv[1];
|
||||
#endif
|
||||
g = getgrammar(filename);
|
||||
fp = fopen("graminit.c", "w");
|
||||
if (fp == NULL) {
|
||||
perror("graminit.c");
|
||||
exit(1);
|
||||
}
|
||||
printf("Writing graminit.c ...\n");
|
||||
printgrammar(g, fp);
|
||||
fclose(fp);
|
||||
fp = fopen("graminit.h", "w");
|
||||
if (fp == NULL) {
|
||||
perror("graminit.h");
|
||||
exit(1);
|
||||
}
|
||||
printf("Writing graminit.h ...\n");
|
||||
printnonterminals(g, fp);
|
||||
fclose(fp);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void
|
||||
fatal(msg)
|
||||
char *msg;
|
||||
{
|
||||
fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* TO DO:
|
||||
|
||||
- improve user interface
|
||||
- check for duplicate definitions of names (instead of fatal err)
|
||||
*/
|
121
Parser/printgrammar.c
Normal file
121
Parser/printgrammar.c
Normal file
|
@ -0,0 +1,121 @@
|
|||
/* Print a bunch of C initializers that represent a grammar */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "grammar.h"
|
||||
|
||||
static void
|
||||
printarcs(i, d, fp)
|
||||
int i;
|
||||
dfa *d;
|
||||
FILE *fp;
|
||||
{
|
||||
arc *a;
|
||||
state *s;
|
||||
int j, k;
|
||||
|
||||
s = d->d_state;
|
||||
for (j = 0; j < d->d_nstates; j++, s++) {
|
||||
fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
|
||||
i, j, s->s_narcs);
|
||||
a = s->s_arc;
|
||||
for (k = 0; k < s->s_narcs; k++, a++)
|
||||
fprintf(fp, "\t{%d, %d},\n", a->a_lbl, a->a_arrow);
|
||||
fprintf(fp, "};\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
printstates(g, fp)
|
||||
grammar *g;
|
||||
FILE *fp;
|
||||
{
|
||||
state *s;
|
||||
dfa *d;
|
||||
int i, j;
|
||||
|
||||
d = g->g_dfa;
|
||||
for (i = 0; i < g->g_ndfas; i++, d++) {
|
||||
printarcs(i, d, fp);
|
||||
fprintf(fp, "static state states_%d[%d] = {\n",
|
||||
i, d->d_nstates);
|
||||
s = d->d_state;
|
||||
for (j = 0; j < d->d_nstates; j++, s++)
|
||||
fprintf(fp, "\t{%d, arcs_%d_%d},\n",
|
||||
s->s_narcs, i, j);
|
||||
fprintf(fp, "};\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
printdfas(g, fp)
|
||||
grammar *g;
|
||||
FILE *fp;
|
||||
{
|
||||
dfa *d;
|
||||
int i, j;
|
||||
|
||||
printstates(g, fp);
|
||||
fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
|
||||
d = g->g_dfa;
|
||||
for (i = 0; i < g->g_ndfas; i++, d++) {
|
||||
fprintf(fp, "\t{%d, \"%s\", %d, %d, states_%d,\n",
|
||||
d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
|
||||
fprintf(fp, "\t \"");
|
||||
for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)
|
||||
fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
|
||||
fprintf(fp, "\"},\n");
|
||||
}
|
||||
fprintf(fp, "};\n");
|
||||
}
|
||||
|
||||
static void
|
||||
printlabels(g, fp)
|
||||
grammar *g;
|
||||
FILE *fp;
|
||||
{
|
||||
label *l;
|
||||
int i;
|
||||
|
||||
fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
|
||||
l = g->g_ll.ll_label;
|
||||
for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
|
||||
if (l->lb_str == NULL)
|
||||
fprintf(fp, "\t{%d, 0},\n", l->lb_type);
|
||||
else
|
||||
fprintf(fp, "\t{%d, \"%s\"},\n",
|
||||
l->lb_type, l->lb_str);
|
||||
}
|
||||
fprintf(fp, "};\n");
|
||||
}
|
||||
|
||||
void
|
||||
printgrammar(g, fp)
|
||||
grammar *g;
|
||||
FILE *fp;
|
||||
{
|
||||
fprintf(fp, "#include \"PROTO.h\"\n");
|
||||
fprintf(fp, "#include \"grammar.h\"\n");
|
||||
printdfas(g, fp);
|
||||
printlabels(g, fp);
|
||||
fprintf(fp, "grammar gram = {\n");
|
||||
fprintf(fp, "\t%d,\n", g->g_ndfas);
|
||||
fprintf(fp, "\tdfas,\n");
|
||||
fprintf(fp, "\t{%d, labels},\n", g->g_ll.ll_nlabels);
|
||||
fprintf(fp, "\t%d\n", g->g_start);
|
||||
fprintf(fp, "};\n");
|
||||
}
|
||||
|
||||
void
|
||||
printnonterminals(g, fp)
|
||||
grammar *g;
|
||||
FILE *fp;
|
||||
{
|
||||
dfa *d;
|
||||
int i;
|
||||
|
||||
d = g->g_dfa;
|
||||
for (i = g->g_ndfas; --i >= 0; d++)
|
||||
fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
|
||||
}
|
490
Parser/tokenizer.c
Normal file
490
Parser/tokenizer.c
Normal file
|
@ -0,0 +1,490 @@
|
|||
/* Tokenizer implementation */
|
||||
|
||||
/* XXX This is rather old, should be restructured perhaps */
|
||||
/* XXX Need a better interface to report errors than writing to stderr */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include "string.h"
|
||||
|
||||
#include "PROTO.h"
|
||||
#include "malloc.h"
|
||||
#include "tokenizer.h"
|
||||
#include "errcode.h"
|
||||
|
||||
#ifdef THINK_C
|
||||
#define TABSIZE 4
|
||||
#endif
|
||||
|
||||
#ifndef TABSIZE
|
||||
#define TABSIZE 8
|
||||
#endif
|
||||
|
||||
/* Token names */
|
||||
|
||||
char *tok_name[] = {
|
||||
"ENDMARKER",
|
||||
"NAME",
|
||||
"NUMBER",
|
||||
"STRING",
|
||||
"NEWLINE",
|
||||
"INDENT",
|
||||
"DEDENT",
|
||||
"LPAR",
|
||||
"RPAR",
|
||||
"LSQB",
|
||||
"RSQB",
|
||||
"COLON",
|
||||
"COMMA",
|
||||
"SEMI",
|
||||
"PLUS",
|
||||
"MINUS",
|
||||
"STAR",
|
||||
"SLASH",
|
||||
"VBAR",
|
||||
"AMPER",
|
||||
"LESS",
|
||||
"GREATER",
|
||||
"EQUAL",
|
||||
"DOT",
|
||||
"PERCENT",
|
||||
"BACKQUOTE",
|
||||
"LBRACE",
|
||||
"RBRACE",
|
||||
"OP",
|
||||
"<ERRORTOKEN>",
|
||||
"<N_TOKENS>"
|
||||
};
|
||||
|
||||
|
||||
/* Create and initialize a new tok_state structure */
|
||||
|
||||
static struct tok_state *
|
||||
tok_new()
|
||||
{
|
||||
struct tok_state *tok = NEW(struct tok_state, 1);
|
||||
if (tok == NULL)
|
||||
return NULL;
|
||||
tok->buf = tok->cur = tok->end = tok->inp = NULL;
|
||||
tok->done = E_OK;
|
||||
tok->fp = NULL;
|
||||
tok->tabsize = TABSIZE;
|
||||
tok->indent = 0;
|
||||
tok->indstack[0] = 0;
|
||||
tok->atbol = 1;
|
||||
tok->pendin = 0;
|
||||
tok->prompt = tok->nextprompt = NULL;
|
||||
tok->lineno = 0;
|
||||
return tok;
|
||||
}
|
||||
|
||||
|
||||
/* Set up tokenizer for string */
|
||||
|
||||
struct tok_state *
|
||||
tok_setups(str)
|
||||
char *str;
|
||||
{
|
||||
struct tok_state *tok = tok_new();
|
||||
if (tok == NULL)
|
||||
return NULL;
|
||||
tok->buf = tok->cur = str;
|
||||
tok->end = tok->inp = strchr(str, '\0');
|
||||
return tok;
|
||||
}
|
||||
|
||||
|
||||
/* Set up tokenizer for string */
|
||||
|
||||
struct tok_state *
|
||||
tok_setupf(fp, ps1, ps2)
|
||||
FILE *fp;
|
||||
char *ps1, *ps2;
|
||||
{
|
||||
struct tok_state *tok = tok_new();
|
||||
if (tok == NULL)
|
||||
return NULL;
|
||||
if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
|
||||
DEL(tok);
|
||||
return NULL;
|
||||
}
|
||||
tok->cur = tok->inp = tok->buf;
|
||||
tok->end = tok->buf + BUFSIZ;
|
||||
tok->fp = fp;
|
||||
tok->prompt = ps1;
|
||||
tok->nextprompt = ps2;
|
||||
return tok;
|
||||
}
|
||||
|
||||
|
||||
/* Free a tok_state structure */
|
||||
|
||||
void
|
||||
tok_free(tok)
|
||||
struct tok_state *tok;
|
||||
{
|
||||
/* XXX really need a separate flag to say 'my buffer' */
|
||||
if (tok->fp != NULL && tok->buf != NULL)
|
||||
DEL(tok->buf);
|
||||
DEL(tok);
|
||||
}
|
||||
|
||||
|
||||
/* Get next char, updating state; error code goes into tok->done */
|
||||
|
||||
static int
|
||||
tok_nextc(tok)
|
||||
register struct tok_state *tok;
|
||||
{
|
||||
if (tok->done != E_OK)
|
||||
return EOF;
|
||||
|
||||
for (;;) {
|
||||
if (tok->cur < tok->inp)
|
||||
return *tok->cur++;
|
||||
if (tok->fp == NULL) {
|
||||
tok->done = E_EOF;
|
||||
return EOF;
|
||||
}
|
||||
if (tok->inp > tok->buf && tok->inp[-1] == '\n')
|
||||
tok->inp = tok->buf;
|
||||
if (tok->inp == tok->end) {
|
||||
int n = tok->end - tok->buf;
|
||||
char *new = tok->buf;
|
||||
RESIZE(new, char, n+n);
|
||||
if (new == NULL) {
|
||||
fprintf(stderr, "tokenizer out of mem\n");
|
||||
tok->done = E_NOMEM;
|
||||
return EOF;
|
||||
}
|
||||
tok->buf = new;
|
||||
tok->inp = tok->buf + n;
|
||||
tok->end = tok->inp + n;
|
||||
}
|
||||
#ifdef USE_READLINE
|
||||
if (tok->prompt != NULL) {
|
||||
extern char *readline PROTO((char *prompt));
|
||||
static int been_here;
|
||||
if (!been_here) {
|
||||
/* Force rebind of TAB to insert-tab */
|
||||
extern int rl_insert();
|
||||
rl_bind_key('\t', rl_insert);
|
||||
been_here++;
|
||||
}
|
||||
if (tok->buf != NULL)
|
||||
free(tok->buf);
|
||||
tok->buf = readline(tok->prompt);
|
||||
(void) intrcheck(); /* Clear pending interrupt */
|
||||
if (tok->nextprompt != NULL)
|
||||
tok->prompt = tok->nextprompt;
|
||||
/* XXX different semantics w/o readline()! */
|
||||
if (tok->buf == NULL) {
|
||||
tok->done = E_EOF;
|
||||
}
|
||||
else {
|
||||
unsigned int n = strlen(tok->buf);
|
||||
if (n > 0)
|
||||
add_history(tok->buf);
|
||||
/* Append the '\n' that readline()
|
||||
doesn't give us, for the tokenizer... */
|
||||
tok->buf = realloc(tok->buf, n+2);
|
||||
if (tok->buf == NULL)
|
||||
tok->done = E_NOMEM;
|
||||
else {
|
||||
tok->end = tok->buf + n;
|
||||
*tok->end++ = '\n';
|
||||
*tok->end = '\0';
|
||||
tok->inp = tok->end;
|
||||
tok->cur = tok->buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
tok->cur = tok->inp;
|
||||
if (tok->prompt != NULL && tok->inp == tok->buf) {
|
||||
fprintf(stderr, "%s", tok->prompt);
|
||||
tok->prompt = tok->nextprompt;
|
||||
}
|
||||
tok->done = fgets_intr(tok->inp,
|
||||
(int)(tok->end - tok->inp), tok->fp);
|
||||
}
|
||||
if (tok->done != E_OK) {
|
||||
if (tok->prompt != NULL)
|
||||
fprintf(stderr, "\n");
|
||||
return EOF;
|
||||
}
|
||||
tok->inp = strchr(tok->inp, '\0');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Back-up one character */
|
||||
|
||||
static void
|
||||
tok_backup(tok, c)
|
||||
register struct tok_state *tok;
|
||||
register int c;
|
||||
{
|
||||
if (c != EOF) {
|
||||
if (--tok->cur < tok->buf) {
|
||||
fprintf(stderr, "tok_backup: begin of buffer\n");
|
||||
abort();
|
||||
}
|
||||
if (*tok->cur != c)
|
||||
*tok->cur = c;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Return the token corresponding to a single character */
|
||||
|
||||
int
|
||||
tok_1char(c)
|
||||
int c;
|
||||
{
|
||||
switch (c) {
|
||||
case '(': return LPAR;
|
||||
case ')': return RPAR;
|
||||
case '[': return LSQB;
|
||||
case ']': return RSQB;
|
||||
case ':': return COLON;
|
||||
case ',': return COMMA;
|
||||
case ';': return SEMI;
|
||||
case '+': return PLUS;
|
||||
case '-': return MINUS;
|
||||
case '*': return STAR;
|
||||
case '/': return SLASH;
|
||||
case '|': return VBAR;
|
||||
case '&': return AMPER;
|
||||
case '<': return LESS;
|
||||
case '>': return GREATER;
|
||||
case '=': return EQUAL;
|
||||
case '.': return DOT;
|
||||
case '%': return PERCENT;
|
||||
case '`': return BACKQUOTE;
|
||||
case '{': return LBRACE;
|
||||
case '}': return RBRACE;
|
||||
default: return OP;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Get next token, after space stripping etc. */
|
||||
|
||||
int
|
||||
tok_get(tok, p_start, p_end)
|
||||
register struct tok_state *tok; /* In/out: tokenizer state */
|
||||
char **p_start, **p_end; /* Out: point to start/end of token */
|
||||
{
|
||||
register int c;
|
||||
|
||||
/* Get indentation level */
|
||||
if (tok->atbol) {
|
||||
register int col = 0;
|
||||
tok->atbol = 0;
|
||||
tok->lineno++;
|
||||
for (;;) {
|
||||
c = tok_nextc(tok);
|
||||
if (c == ' ')
|
||||
col++;
|
||||
else if (c == '\t')
|
||||
col = (col/tok->tabsize + 1) * tok->tabsize;
|
||||
else
|
||||
break;
|
||||
}
|
||||
tok_backup(tok, c);
|
||||
if (col == tok->indstack[tok->indent]) {
|
||||
/* No change */
|
||||
}
|
||||
else if (col > tok->indstack[tok->indent]) {
|
||||
/* Indent -- always one */
|
||||
if (tok->indent+1 >= MAXINDENT) {
|
||||
fprintf(stderr, "excessive indent\n");
|
||||
tok->done = E_TOKEN;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
tok->pendin++;
|
||||
tok->indstack[++tok->indent] = col;
|
||||
}
|
||||
else /* col < tok->indstack[tok->indent] */ {
|
||||
/* Dedent -- any number, must be consistent */
|
||||
while (tok->indent > 0 &&
|
||||
col < tok->indstack[tok->indent]) {
|
||||
tok->indent--;
|
||||
tok->pendin--;
|
||||
}
|
||||
if (col != tok->indstack[tok->indent]) {
|
||||
fprintf(stderr, "inconsistent dedent\n");
|
||||
tok->done = E_TOKEN;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*p_start = *p_end = tok->cur;
|
||||
|
||||
/* Return pending indents/dedents */
|
||||
if (tok->pendin != 0) {
|
||||
if (tok->pendin < 0) {
|
||||
tok->pendin++;
|
||||
return DEDENT;
|
||||
}
|
||||
else {
|
||||
tok->pendin--;
|
||||
return INDENT;
|
||||
}
|
||||
}
|
||||
|
||||
again:
|
||||
/* Skip spaces */
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (c == ' ' || c == '\t');
|
||||
|
||||
/* Set start of current token */
|
||||
*p_start = tok->cur - 1;
|
||||
|
||||
/* Skip comment */
|
||||
if (c == '#') {
|
||||
/* Hack to allow overriding the tabsize in the file.
|
||||
This is also recognized by vi, when it occurs near the
|
||||
beginning or end of the file. (Will vi never die...?) */
|
||||
int x;
|
||||
if (sscanf(tok->cur, " vi:set tabsize=%d:", &x) == 1 &&
|
||||
x >= 1 && x <= 40) {
|
||||
fprintf(stderr, "# vi:set tabsize=%d:\n", x);
|
||||
tok->tabsize = x;
|
||||
}
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (c != EOF && c != '\n');
|
||||
}
|
||||
|
||||
/* Check for EOF and errors now */
|
||||
if (c == EOF)
|
||||
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
|
||||
|
||||
/* Identifier (most frequent token!) */
|
||||
if (isalpha(c) || c == '_') {
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isalnum(c) || c == '_');
|
||||
tok_backup(tok, c);
|
||||
*p_end = tok->cur;
|
||||
return NAME;
|
||||
}
|
||||
|
||||
/* Newline */
|
||||
if (c == '\n') {
|
||||
tok->atbol = 1;
|
||||
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
|
||||
return NEWLINE;
|
||||
}
|
||||
|
||||
/* Number */
|
||||
if (isdigit(c)) {
|
||||
if (c == '0') {
|
||||
/* Hex or octal */
|
||||
c = tok_nextc(tok);
|
||||
if (c == '.')
|
||||
goto fraction;
|
||||
if (c == 'x' || c == 'X') {
|
||||
/* Hex */
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isxdigit(c));
|
||||
}
|
||||
else {
|
||||
/* Octal; c is first char of it */
|
||||
/* There's no 'isoctdigit' macro, sigh */
|
||||
while ('0' <= c && c < '8') {
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Decimal */
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isdigit(c));
|
||||
/* Accept floating point numbers.
|
||||
XXX This accepts incomplete things like 12e or 1e+;
|
||||
worry about that at run-time.
|
||||
XXX Doesn't accept numbers starting with a dot */
|
||||
if (c == '.') {
|
||||
fraction:
|
||||
/* Fraction */
|
||||
do {
|
||||
c = tok_nextc(tok);
|
||||
} while (isdigit(c));
|
||||
}
|
||||
if (c == 'e' || c == 'E') {
|
||||
/* Exponent part */
|
||||
c = tok_nextc(tok);
|
||||
if (c == '+' || c == '-')
|
||||
c = tok_nextc(tok);
|
||||
while (isdigit(c)) {
|
||||
c = tok_nextc(tok);
|
||||
}
|
||||
}
|
||||
}
|
||||
tok_backup(tok, c);
|
||||
*p_end = tok->cur;
|
||||
return NUMBER;
|
||||
}
|
||||
|
||||
/* String */
|
||||
if (c == '\'') {
|
||||
for (;;) {
|
||||
c = tok_nextc(tok);
|
||||
if (c == '\n' || c == EOF) {
|
||||
tok->done = E_TOKEN;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
if (c == '\\') {
|
||||
c = tok_nextc(tok);
|
||||
*p_end = tok->cur;
|
||||
if (c == '\n' || c == EOF) {
|
||||
tok->done = E_TOKEN;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (c == '\'')
|
||||
break;
|
||||
}
|
||||
*p_end = tok->cur;
|
||||
return STRING;
|
||||
}
|
||||
|
||||
/* Line continuation */
|
||||
if (c == '\\') {
|
||||
c = tok_nextc(tok);
|
||||
if (c != '\n') {
|
||||
tok->done = E_TOKEN;
|
||||
return ERRORTOKEN;
|
||||
}
|
||||
goto again; /* Read next line */
|
||||
}
|
||||
|
||||
/* Punctuation character */
|
||||
*p_end = tok->cur;
|
||||
return tok_1char(c);
|
||||
}
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
void
|
||||
tok_dump(type, start, end)
|
||||
int type;
|
||||
char *start, *end;
|
||||
{
|
||||
printf("%s", tok_name[type]);
|
||||
if (type == NAME || type == NUMBER || type == STRING || type == OP)
|
||||
printf("(%.*s)", (int)(end - start), start);
|
||||
}
|
||||
|
||||
#endif
|
29
Parser/tokenizer.h
Normal file
29
Parser/tokenizer.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* Tokenizer interface */
|
||||
|
||||
#include "token.h" /* For token types */
|
||||
|
||||
#define MAXINDENT 100 /* Max indentation level */
|
||||
|
||||
/* Tokenizer state */
|
||||
struct tok_state {
|
||||
/* Input state; buf <= cur <= inp <= end */
|
||||
/* NB an entire token must fit in the buffer */
|
||||
char *buf; /* Input buffer */
|
||||
char *cur; /* Next character in buffer */
|
||||
char *inp; /* End of data in buffer */
|
||||
char *end; /* End of input buffer */
|
||||
int done; /* 0 normally, 1 at EOF, -1 after error */
|
||||
FILE *fp; /* Rest of input; NULL if tokenizing a string */
|
||||
int tabsize; /* Tab spacing */
|
||||
int indent; /* Current indentation index */
|
||||
int indstack[MAXINDENT]; /* Stack of indents */
|
||||
int atbol; /* Nonzero if at begin of new line */
|
||||
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
|
||||
char *prompt, *nextprompt; /* For interactive prompting */
|
||||
int lineno; /* Current line number */
|
||||
};
|
||||
|
||||
extern struct tok_state *tok_setups PROTO((char *));
|
||||
extern struct tok_state *tok_setupf PROTO((FILE *, char *ps1, char *ps2));
|
||||
extern void tok_free PROTO((struct tok_state *));
|
||||
extern int tok_get PROTO((struct tok_state *, char **, char **));
|
Loading…
Add table
Add a link
Reference in a new issue