/* * Copyright (C) 2001 - 2005 Mike Wray * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of the * License, or (at your option) any later version. This library is * distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef __KERNEL__ # include # include # include # include # include #else # include # include #endif #include "sys_net.h" #include "iostream.h" #include "lexis.h" #include "sxpr_parser.h" #include "sys_string.h" #include "enum.h" /** @file * Sxpr parsing. * * So that the parser does not leak memory, all sxprs constructed by * the parser must be freed on error. On successful parse the sxpr * returned becomes the responsibility of the caller. * * @author Mike Wray */ #ifdef DEBUG #define dprintf(fmt, args...) IOStream_print(iostdout, "[DEBUG] %s" fmt, __FUNCTION__, ##args) #else #define dprintf(fmt, args...) do{ }while(0) #endif #undef printf #define printf(fmt, args...) IOStream_print(iostdout, fmt, ##args) static int state_start(Parser *p, char c); static int begin_start(Parser *p, char c); #if 0 /** Print a parse error. * * @param in parser * @param msg format followed by printf arguments */ static void eprintf(Parser *in, char *msg, ...){ va_list args; if(in->error_out){ va_start(args, msg); IOStream_vprint(in->error_out, msg, args); va_end(args); } } /** Print a parse warning. * * @param in parser * @param msg format followed by printf arguments */ static void wprintf(Parser *in, char *msg, ...){ va_list args; if(in->error_out){ va_start(args, msg); IOStream_vprint(in->error_out, msg, args); va_end(args); } } #endif /*============================================================================*/ /** Record defining the message for a parse error. */ typedef struct { ParseErrorId id; char *message; } ParseError; /** Format for printing parse error messages. */ #define PARSE_ERR_FMT "parse error> line %3d, column %2d: %s" /** Message catalog for the parse error codes. */ static ParseError catalog[] = { { PARSE_ERR_UNSPECIFIED, "unspecified error" }, { PARSE_ERR_NOMEM, "out of memory" }, { PARSE_ERR_UNEXPECTED_EOF, "unexpected end of input" }, { PARSE_ERR_TOKEN_TOO_LONG, "token too long" }, { PARSE_ERR_INVALID_SYNTAX, "syntax error" }, { PARSE_ERR_INVALID_ESCAPE, "invalid escape" }, { 0, NULL } }; /** Number of entries in the message catalog. */ const static int catalog_n = sizeof(catalog)/sizeof(ParseError); /** Set the parser error stream. * Parse errors are reported on the the error stream if it is non-null. * * @param z parser * @param error_out error stream */ void Parser_set_error_stream(Parser *z, IOStream *error_out){ z->error_out = error_out; } /** Get the parser error message for an error code. * * @param id error code * @return error message (empty string if the code is unknown) */ static char *get_message(ParseErrorId id){ int i; for(i = 0; i < catalog_n; i++){ if(id == catalog[i].id){ return catalog[i].message; } } return ""; } #if 0 /** Get the line number. * * @param in parser */ static int get_line(Parser *in){ return in->line_no; } /** Get the column number. * * @param in parser */ static int get_column(Parser *in){ return in->char_no; } #endif /** Get the line number the current token started on. * * @param in parser */ static int get_tok_line(Parser *in){ return in->tok_begin_line; } /** Get the column number the current token started on. * * @param in parser */ static int get_tok_column(Parser *in){ return in->tok_begin_char; } /** Return the current token. * The return value points at the internal buffer, so * it must not be modified (or freed). Use copy_token() if you need a copy. * * @param p parser * @return token */ char *peek_token(Parser *p){ return p->tok; } int token_len(Parser *p){ return p->tok_end - p->tok; } /** Return a copy of the current token. * The returned value should be freed when finished with. * * @param p parser * @return copy of token */ char *copy_token(Parser *p){ int n = token_len(p); char *buf = allocate(n + 1); if(buf){ memcpy(buf, peek_token(p), n); buf[n] = '\0'; } return buf; } void new_token(Parser *p){ memset(p->buf, 0, p->buf_end - p->buf); p->tok = p->buf; p->tok_end = p->tok; p->tok_begin_line = p->line_no; p->tok_begin_char = p->char_no; } /** Report a parse error. * Does nothing if the error stream is null or there is no error. * * @param in parser */ static void report_error(Parser *in){ if(in->error_out && in->err){ char *msg = get_message(in->err); char *tok = peek_token(in); IOStream_print(in->error_out, PARSE_ERR_FMT, get_tok_line(in), get_tok_column(in), msg); if(tok && tok[0]){ IOStream_print(in->error_out, " '%s'", tok); } IOStream_print(in->error_out, "\n"); } } /** Get the error message for the current parse error code. * Does nothing if there is no error. * * @param in parser * @param buf where to place the message * @param n maximum number of characters to place in buf * @return current error code (zero for no error) */ int Parser_error_message(Parser *in, char *buf, int n){ if(in->err){ char *msg = get_message(in->err); snprintf(buf, n, PARSE_ERR_FMT, get_tok_line(in), get_tok_column(in), msg); } return in->err; } /** Flag a parse error. All subsequent reads will fail. * Does not change the parser error code if it is already set. * * @param in parser * @param id error code */ int Parser_error_id(Parser *in, ParseErrorId id){ if(!in->err){ in->err = id; report_error(in); } return -EINVAL; } /** Flag an unspecified parse error. * * @param in parser */ int Parser_error(Parser *in){ return Parser_error_id(in, PARSE_ERR_INVALID_SYNTAX); } /** Test if the parser's error flag is set. * * @param in parser * @return 1 if set, 0 otherwise */ int Parser_has_error(Parser *in){ return (in->err > 0); } /** Test if the parser is at end of input. * * @param in parser * @return 1 if at EOF, 0 otherwise */ int Parser_at_eof(Parser *p){ return p->eof; } void ParserState_free(ParserState *z){ if(!z) return; objfree(z->val); deallocate(z); } int ParserState_new(ParserStateFn *fn, char *name, ParserState *parent, ParserState **val){ int err = -ENOMEM; ParserState *z; z = ALLOCATE(ParserState); if(!z) goto exit; z->name = name; z->fn = fn; z->parent = parent; z->val = ONULL; err = 0; exit: *val = (err ? NULL : z); return err; } void Parser_pop(Parser *p){ ParserState *s = p->state; if(!s) return; dprintf("Parser_pop> %s\n", s->name); p->state = s->parent; if (p->start_state == s) { p->start_state = NULL; } ParserState_free(s); } /** Free a parser. * No-op if the parser is null. * * @param z parser */ void Parser_free(Parser *z){ if(!z) return; // Hmmm. Need to free states, but careful about double free of values. while(z->state){ objfree(z->state->val); Parser_pop(z); } if(z->buf) deallocate(z->buf); objfree(z->val); z->val = ONONE; deallocate(z); } int Parser_push(Parser *p, ParserStateFn *fn, char *name){ dprintf("Parser_push> %s\n", name); return ParserState_new(fn, name, p->state, &p->state); } int Parser_return(Parser *p){ int err = 0; Sxpr val = ONONE; if(!p->state){ err = -EINVAL; goto exit; } val = p->state->val; p->state->val = ONONE; Parser_pop(p); if(p->state){ err = cons_push(&p->state->val, val); } else { val = nrev(val); p->val = val; } exit: if(err){ objfree(val); } return err; } /** Reset the fields of a parser to initial values. * * @param z parser */ static void reset(Parser *z){ // leave flags // leave error_out while(z->state){ Parser_pop(z); } z->val = ONONE; z->eof = 0; z->err = 0; z->line_no = 1; z->char_no = 0; memset(z->buf, 0, z->buf_end - z->buf); z->tok = z->buf; z->tok_end = z->tok; z->tok_begin_line = 0; z->tok_begin_char = 0; z->start_state = NULL; } /** Create a new parser. The error stream defaults to null. */ Parser * Parser_new(void){ Parser *z = ALLOCATE(Parser); int n = PARSER_BUF_SIZE; int err = -ENOMEM; if(!z) goto exit; z->buf = allocate(n); if(!z->buf) goto exit; err = 0; z->buf_end = z->buf + n; z->begin = begin_start; reset(z); exit: if(err){ Parser_free(z); z = NULL; } return z; } /** Get the next character. * Records the character read in the parser, * and sets the line and character counts. * * @param p parser * @return error flag: 0 on success, non-zero on error */ static int input_char(Parser *p, char c){ int err = 0; if(c=='\n'){ p->line_no++; p->char_no = 0; } else { p->char_no++; } return err; } int save_char(Parser *p, char c){ int err = 0; if(p->tok_end >= p->buf_end){ int buf_n = (p->buf_end - p->buf) + PARSER_BUF_INCREMENT; char *buf = allocate(buf_n); if(!buf){ err = -ENOMEM; goto exit; } memcpy(buf, p->buf, p->tok_end - p->buf); p->buf_end = buf + buf_n; p->tok = buf + (p->tok - p->buf); p->tok_end = buf + (p->tok_end - p->buf); deallocate(p->buf); p->buf = buf; } *p->tok_end++ = c; exit: return err; } /** Determine if a character is a separator. * * @param p parser * @param c character to test * @return 1 if a separator, 0 otherwise */ static int is_separator(Parser *p, char c){ return in_sep_class(c); } int Parser_set_value(Parser *p, Sxpr obj){ int err = 0; if(NOMEMP(obj)){ err = -ENOMEM; } else { p->state->val = obj; } return err; } int Parser_intern(Parser *p){ Sxpr obj = intern(peek_token(p)); return Parser_set_value(p, obj); } int Parser_atom(Parser *p){ Sxpr obj; long v; if(Parser_flags(p, PARSE_INT) && convert_atol(peek_token(p), &v) == 0){ obj = OINT(v); } else { obj = atom_new(peek_token(p)); } return Parser_set_value(p, obj); } int Parser_string(Parser *p){ Sxpr obj = string_new_n(peek_token(p), token_len(p)); return Parser_set_value(p, obj); } int Parser_data(Parser *p){ Sxpr obj = string_new_n(peek_token(p), token_len(p)); return Parser_set_value(p, obj); } int Parser_uint(Parser *p){ unsigned int x = htonl(*(unsigned int *)peek_token(p)); return Parser_set_value(p, OINT(x)); } static int get_escape(char c, char *d){ int err = 0; switch(c){ case 'a': *d = '\a'; break; case 'b': *d = '\b'; break; case 'f': *d = '\f'; break; case 'n': *d = '\n'; break; case 'r': *d = '\r'; break; case 't': *d = '\t'; break; case 'v': *d = '\v'; break; case c_escape: *d = c_escape; break; case c_single_quote: *d = c_single_quote; break; case c_double_quote: *d = c_double_quote; break; default: err = -EINVAL; } return err; } int Parser_ready(Parser *p){ return CONSP(p->val) || (p->start_state && CONSP(p->start_state->val)); } Sxpr Parser_get_val(Parser *p){ Sxpr v = ONONE, w = ONONE; if(CONSP(p->val)){ } else if (p->start_state && CONSP(p->start_state->val)){ p->val = p->start_state->val; p->val = nrev(p->val); p->start_state->val = ONULL; } else { goto exit; } w = p->val; v = CAR(w); p->val = CDR(w); hfree(w); exit: return v; } Sxpr Parser_get_all(Parser *p){ Sxpr v = ONULL; if(CONSP(p->val)){ v = p->val; p->val = ONONE; } else if(p->start_state && CONSP(p->start_state->val)){ v = p->start_state->val; p->start_state->val = ONULL; v = nrev(v); } return v; } static int state_comment(Parser *p, char c){ int err = 0; if(c == '\n' || Parser_at_eof(p)){ Parser_pop(p); } else { err = input_char(p, c); } return err; } static int begin_comment(Parser *p, char c){ int err = 0; err = Parser_push(p, state_comment, "comment"); if(err) goto exit; err = input_char(p, c); exit: return err; } static int end_string(Parser *p){ int err = 0; err = Parser_string(p); if(err) goto exit; err = Parser_return(p); exit: return err; } static int octaldone(Parser *p){ int err = 0; char d = (char)(p->state->ival & 0xff); Parser_pop(p); err = Parser_input_char(p, d); return err; } static int octaldigit(Parser *p, int d){ int err = 0; p->state->ival *= 8; p->state->ival += d; p->state->count++; if(err) goto exit; if(p->state->ival < 0 || p->state->ival > 0xff){ err = Parser_error(p); goto exit; } if(p->state->count == 3){ err = octaldone(p); } exit: return err; } static int state_octal(Parser *p, char c){ int err = 0; if(Parser_at_eof(p)){ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); goto exit; } else if('0' <= c && c <= '7'){ err = octaldigit(p, c - '0'); } else { err = octaldone(p); if(err) goto exit; Parser_input_char(p, c); } exit: return err; } static int hexdone(Parser *p){ int err = 0; char d = (char)(p->state->ival & 0xff); Parser_pop(p); err = Parser_input_char(p, d); return err; } static int hexdigit(Parser *p, int d){ int err = 0; p->state->ival *= 16; p->state->ival += d; p->state->count++; if(err) goto exit; if(p->state->ival < 0 || p->state->ival > 0xff){ err = Parser_error(p); goto exit; } if(p->state->count == 2){ err = hexdone(p); } exit: return err; } static int state_hex(Parser *p, char c){ int err = 0; if(Parser_at_eof(p)){ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); goto exit; } else if('0' <= c && c <= '9'){ err = hexdigit(p, c - '0'); } else if('A' <= c && c <= 'F'){ err = hexdigit(p, c - 'A' + 10); } else if('a' <= c && c <= 'f'){ err = hexdigit(p, c - 'a' + 10); } else if(p->state->count){ err = hexdone(p); if(err) goto exit; Parser_input_char(p, c); } exit: return err; } static int state_escape(Parser *p, char c){ int err = 0; char d; if(Parser_at_eof(p)){ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); goto exit; } if(get_escape(c, &d) == 0){ err = save_char(p, d); if(err) goto exit; Parser_pop(p); } else if(c == 'x'){ p->state->fn = state_hex; p->state->ival = 0; p->state->count = 0; } else { p->state->fn = state_octal; p->state->ival = 0; p->state->count = 0; err = Parser_input_char(p, c); } exit: return err; } static int state_string(Parser *p, char c){ int err = 0; if(Parser_at_eof(p)){ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); } else if(c == p->state->delim){ err = end_string(p); } else if(c == '\\'){ err = Parser_push(p, state_escape, "escape"); } else { err = save_char(p, c); } return err; } static int begin_string(Parser *p, char c){ int err = 0; err = Parser_push(p, state_string, "string"); if(err) goto exit; new_token(p); p->state->delim = c; exit: return err; } static int end_atom(Parser *p){ int err = 0; err = Parser_atom(p); if(err) goto exit; err = Parser_return(p); exit: return err; } static int state_atom(Parser *p, char c){ int err = 0; if(Parser_at_eof(p)){ err = end_atom(p); } else if(is_separator(p, c) || in_space_class(c) || in_comment_class(c)){ err = end_atom(p); if(err) goto exit; err = Parser_input_char(p, c); } else { err = save_char(p, c); } exit: return err; } static int begin_atom(Parser *p, char c){ int err = 0; err = Parser_push(p, state_atom, "atom"); if(err) goto exit; new_token(p); err = save_char(p, c); exit: return err; } static int end_data(Parser *p){ int err = 0; err = Parser_data(p); if(err) goto exit; err = Parser_return(p); exit: return err; } static int counted_data(Parser *p, char c){ int err = 0; err = save_char(p, c); if(err) goto exit; if(token_len(p) == p->state->count){ err = end_data(p); } exit: return err; } static int counted_data_count(Parser *p, char c){ int err = 0; if(c == p->state->delim){ new_token(p); p->state->count = p->state->ival; p->state->fn = counted_data; } else if('0' <= c && c <= '9'){ p->state->ival *= 10; p->state->ival += c - '0'; } else { err = -EINVAL; } return err; } static int quoted_data(Parser *p, char c){ int err = 0; int count = p->state->count; err = save_char(p, c); if(err) goto exit; // Check that buf is longer than delim and // ends with delim. If so, trim delim off and return. if((token_len(p) >= count) && !memcmp(p->tok_end - count, p->buf, count)){ p->tok_end -= count; end_data(p); } exit: return err; } static int quoted_data_delim(Parser *p, char c){ // Saves the delim in the token buffer. int err = 0; err = save_char(p, c); if(err) goto exit; if(c == p->state->delim){ p->state->fn = quoted_data; p->state->count = token_len(p); // Advance the token pointer past the delim. p->tok = p->tok_end; } exit: return err; } static int state_data(Parser *p, char c){ // Quoted data: // <state->delim = c; p->state->fn = counted_data_count; p->state->ival = 0; new_token(p); break; case c_data_quote: p->state->delim = c; p->state->fn = quoted_data_delim; new_token(p); err = save_char(p, c); break; default: err = Parser_error(p); break; } return err; } static int begin_data(Parser *p, char c){ int err = 0; err = Parser_push(p, state_data, "data"); if(err) goto exit; new_token(p); exit: return err; } static int state_list(Parser *p, char c){ int err = 0; dprintf(">\n"); if(Parser_at_eof(p)){ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF); } else if(c == c_list_close){ p->state->val = nrev(p->state->val); err = Parser_return(p); } else { err = state_start(p, c); } dprintf("< err=%d\n", err); return err; } static int begin_list(Parser *p, char c){ return Parser_push(p, state_list, "list"); } static int state_start(Parser *p, char c){ int err = 0; dprintf(">\n"); if(Parser_at_eof(p)){ err = Parser_return(p); } else if(in_space_class(c)){ //skip } else if(in_comment_class(c)){ begin_comment(p, c); } else if(c == c_list_open){ begin_list(p, c); } else if(c == c_list_close){ err = Parser_error(p); } else if(in_string_quote_class(c)){ begin_string(p, c); } else if(c == c_data_open){ begin_data(p, c); } else if(in_printable_class(c)){ begin_atom(p, c); } else if(c == 0x04){ //ctrl-D, EOT: end-of-text. Parser_input_eof(p); } else { err = Parser_error(p); } dprintf("< err=%d\n", err); return err; } int begin_start(Parser *p, char c){ int err = 0; dprintf(">\n"); err = Parser_push(p, state_start, "start"); if(err) goto exit; p->start_state = p->state; exit: dprintf("< err=%d\n", err); return err; } int Parser_input_char(Parser *p, char c){ int err = 0; if(Parser_at_eof(p)){ //skip; } else { input_char(p, c); } if(!p->state){ err = p->begin(p, c); if(err) goto exit; } err = p->state->fn(p, c); exit: return err; } int Parser_input_eof(Parser *p){ int err = 0; p->eof = 1; err = Parser_input_char(p, IOSTREAM_EOF); return err; } int Parser_input(Parser *p, char *buf, int buf_n){ int err = 0; int i = 0; dprintf("> buf_n=%d\n", buf_n); if(buf_n <= 0){ buf_n = 0; err = Parser_input_eof(p); goto exit; } dprintf("> buf=|%*s|\n", buf_n, buf); for(i = 0; i < buf_n; i++){ err = Parser_input_char(p, buf[i]); if(err) goto exit; } exit: err = (err < 0 ? err : buf_n); dprintf("< err=%d\n", err); return err; } #ifdef SXPR_PARSER_MAIN /* Stuff for standalone testing. */ #include "file_stream.h" //#include "string_stream.h" /** Main program for testing. * Parses input and prints it. * * @param argc number of arguments * @param argv arguments * @return error code */ int main(int argc, char *argv[]){ Parser *pin; int err = 0; char buf[1024]; int k; Sxpr obj; int i = 0; pin = Parser_new(); Parser_set_error_stream(pin, iostdout); dprintf("> parse...\n"); while(1){ k = fread(buf, 1, 100, stdin); if(k>=0){ buf[k+1] = '\0'; } err = Parser_input(pin, buf, k); while(Parser_ready(pin)){ obj = Parser_get_val(pin); printf("obj %d\n", i++); objprint(iostdout, obj, 0); printf("\n"); } if(k <= 0) break; } dprintf("> err=%d\n", err); return 0; } #endif