diff options
author | David M. Lee <dlee@digium.com> | 2013-01-07 14:24:28 -0600 |
---|---|---|
committer | David M. Lee <dlee@digium.com> | 2013-01-07 14:24:28 -0600 |
commit | f3ab456a17af1c89a6e3be4d20c5944853df1cb0 (patch) | |
tree | d00e1a332cd038a6d906a1ea0ac91e1a4458e617 /pjlib-util/src/pjlib-util/scanner.c |
Import pjproject-2.0.1
Diffstat (limited to 'pjlib-util/src/pjlib-util/scanner.c')
-rw-r--r-- | pjlib-util/src/pjlib-util/scanner.c | 636 |
1 files changed, 636 insertions, 0 deletions
diff --git a/pjlib-util/src/pjlib-util/scanner.c b/pjlib-util/src/pjlib-util/scanner.c new file mode 100644 index 0000000..d8e1c8e --- /dev/null +++ b/pjlib-util/src/pjlib-util/scanner.c @@ -0,0 +1,636 @@ +/* $Id: scanner.c 3553 2011-05-05 06:14:19Z nanang $ */ +/* + * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com) + * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <pjlib-util/scanner.h> +#include <pj/ctype.h> +#include <pj/string.h> +#include <pj/except.h> +#include <pj/os.h> +#include <pj/errno.h> +#include <pj/assert.h> + +#define PJ_SCAN_IS_SPACE(c) ((c)==' ' || (c)=='\t') +#define PJ_SCAN_IS_NEWLINE(c) ((c)=='\r' || (c)=='\n') +#define PJ_SCAN_IS_PROBABLY_SPACE(c) ((c) <= 32) +#define PJ_SCAN_CHECK_EOF(s) (s != scanner->end) + + +#if defined(PJ_SCANNER_USE_BITWISE) && PJ_SCANNER_USE_BITWISE != 0 +# include "scanner_cis_bitwise.c" +#else +# include "scanner_cis_uint.c" +#endif + + +static void pj_scan_syntax_err(pj_scanner *scanner) +{ + (*scanner->callback)(scanner); +} + + +PJ_DEF(void) pj_cis_add_range(pj_cis_t *cis, int cstart, int cend) +{ + /* Can not set zero. This is the requirement of the parser. */ + pj_assert(cstart > 0); + + while (cstart != cend) { + PJ_CIS_SET(cis, cstart); + ++cstart; + } +} + +PJ_DEF(void) pj_cis_add_alpha(pj_cis_t *cis) +{ + pj_cis_add_range( cis, 'a', 'z'+1); + pj_cis_add_range( cis, 'A', 'Z'+1); +} + +PJ_DEF(void) pj_cis_add_num(pj_cis_t *cis) +{ + pj_cis_add_range( cis, '0', '9'+1); +} + +PJ_DEF(void) pj_cis_add_str( pj_cis_t *cis, const char *str) +{ + while (*str) { + PJ_CIS_SET(cis, *str); + ++str; + } +} + +PJ_DEF(void) pj_cis_add_cis( pj_cis_t *cis, const pj_cis_t *rhs) +{ + int i; + for (i=0; i<256; ++i) { + if (PJ_CIS_ISSET(rhs, i)) + PJ_CIS_SET(cis, i); + } +} + +PJ_DEF(void) pj_cis_del_range( pj_cis_t *cis, int cstart, int cend) +{ + while (cstart != cend) { + PJ_CIS_CLR(cis, cstart); + cstart++; + } +} + +PJ_DEF(void) pj_cis_del_str( pj_cis_t *cis, const char *str) +{ + while (*str) { + PJ_CIS_CLR(cis, *str); + ++str; + } +} + +PJ_DEF(void) pj_cis_invert( pj_cis_t *cis ) +{ + unsigned i; + /* Can not set zero. This is the requirement of the parser. */ + for (i=1; i<256; ++i) { + if (PJ_CIS_ISSET(cis,i)) + PJ_CIS_CLR(cis,i); + else + PJ_CIS_SET(cis,i); + } +} + +PJ_DEF(void) pj_scan_init( pj_scanner *scanner, char *bufstart, int buflen, + unsigned options, pj_syn_err_func_ptr callback ) +{ + PJ_CHECK_STACK(); + + scanner->begin = scanner->curptr = bufstart; + scanner->end = bufstart + buflen; + scanner->line = 1; + scanner->start_line = scanner->begin; + scanner->callback = callback; + scanner->skip_ws = options; + + if (scanner->skip_ws) + pj_scan_skip_whitespace(scanner); +} + + +PJ_DEF(void) pj_scan_fini( pj_scanner *scanner ) +{ + PJ_CHECK_STACK(); + PJ_UNUSED_ARG(scanner); +} + +PJ_DEF(void) pj_scan_skip_whitespace( pj_scanner *scanner ) +{ + register char *s = scanner->curptr; + + while (PJ_SCAN_IS_SPACE(*s)) { + ++s; + } + + if (PJ_SCAN_IS_NEWLINE(*s) && (scanner->skip_ws & PJ_SCAN_AUTOSKIP_NEWLINE)) { + for (;;) { + if (*s == '\r') { + ++s; + if (*s == '\n') ++s; + ++scanner->line; + scanner->curptr = scanner->start_line = s; + } else if (*s == '\n') { + ++s; + ++scanner->line; + scanner->curptr = scanner->start_line = s; + } else if (PJ_SCAN_IS_SPACE(*s)) { + do { + ++s; + } while (PJ_SCAN_IS_SPACE(*s)); + } else { + break; + } + } + } + + if (PJ_SCAN_IS_NEWLINE(*s) && (scanner->skip_ws & PJ_SCAN_AUTOSKIP_WS_HEADER)==PJ_SCAN_AUTOSKIP_WS_HEADER) { + /* Check for header continuation. */ + scanner->curptr = s; + + if (*s == '\r') { + ++s; + } + if (*s == '\n') { + ++s; + } + scanner->start_line = s; + + if (PJ_SCAN_IS_SPACE(*s)) { + register char *t = s; + do { + ++t; + } while (PJ_SCAN_IS_SPACE(*t)); + + ++scanner->line; + scanner->curptr = t; + } + } else { + scanner->curptr = s; + } +} + +PJ_DEF(void) pj_scan_skip_line( pj_scanner *scanner ) +{ + char *s = pj_ansi_strchr(scanner->curptr, '\n'); + if (!s) { + scanner->curptr = scanner->end; + } else { + scanner->curptr = scanner->start_line = s+1; + scanner->line++; + } +} + +PJ_DEF(int) pj_scan_peek( pj_scanner *scanner, + const pj_cis_t *spec, pj_str_t *out) +{ + register char *s = scanner->curptr; + + if (s >= scanner->end) { + pj_scan_syntax_err(scanner); + return -1; + } + + /* Don't need to check EOF with PJ_SCAN_CHECK_EOF(s) */ + while (pj_cis_match(spec, *s)) + ++s; + + pj_strset3(out, scanner->curptr, s); + return *s; +} + + +PJ_DEF(int) pj_scan_peek_n( pj_scanner *scanner, + pj_size_t len, pj_str_t *out) +{ + char *endpos = scanner->curptr + len; + + if (endpos > scanner->end) { + pj_scan_syntax_err(scanner); + return -1; + } + + pj_strset(out, scanner->curptr, len); + return *endpos; +} + + +PJ_DEF(int) pj_scan_peek_until( pj_scanner *scanner, + const pj_cis_t *spec, + pj_str_t *out) +{ + register char *s = scanner->curptr; + + if (s >= scanner->end) { + pj_scan_syntax_err(scanner); + return -1; + } + + while (PJ_SCAN_CHECK_EOF(s) && !pj_cis_match( spec, *s)) + ++s; + + pj_strset3(out, scanner->curptr, s); + return *s; +} + + +PJ_DEF(void) pj_scan_get( pj_scanner *scanner, + const pj_cis_t *spec, pj_str_t *out) +{ + register char *s = scanner->curptr; + + pj_assert(pj_cis_match(spec,0)==0); + + /* EOF is detected implicitly */ + if (!pj_cis_match(spec, *s)) { + pj_scan_syntax_err(scanner); + return; + } + + do { + ++s; + } while (pj_cis_match(spec, *s)); + /* No need to check EOF here (PJ_SCAN_CHECK_EOF(s)) because + * buffer is NULL terminated and pj_cis_match(spec,0) should be + * false. + */ + + pj_strset3(out, scanner->curptr, s); + + scanner->curptr = s; + + if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) { + pj_scan_skip_whitespace(scanner); + } +} + + +PJ_DEF(void) pj_scan_get_unescape( pj_scanner *scanner, + const pj_cis_t *spec, pj_str_t *out) +{ + register char *s = scanner->curptr; + char *dst = s; + + pj_assert(pj_cis_match(spec,0)==0); + + /* Must not match character '%' */ + pj_assert(pj_cis_match(spec,'%')==0); + + /* EOF is detected implicitly */ + if (!pj_cis_match(spec, *s) && *s != '%') { + pj_scan_syntax_err(scanner); + return; + } + + out->ptr = s; + do { + if (*s == '%') { + if (s+3 <= scanner->end && pj_isxdigit(*(s+1)) && + pj_isxdigit(*(s+2))) + { + *dst = (pj_uint8_t) ((pj_hex_digit_to_val(*(s+1)) << 4) + + pj_hex_digit_to_val(*(s+2))); + ++dst; + s += 3; + } else { + *dst++ = *s++; + *dst++ = *s++; + break; + } + } + + if (pj_cis_match(spec, *s)) { + char *start = s; + do { + ++s; + } while (pj_cis_match(spec, *s)); + + if (dst != start) pj_memmove(dst, start, s-start); + dst += (s-start); + } + + } while (*s == '%'); + + scanner->curptr = s; + out->slen = (dst - out->ptr); + + if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) { + pj_scan_skip_whitespace(scanner); + } +} + + +PJ_DEF(void) pj_scan_get_quote( pj_scanner *scanner, + int begin_quote, int end_quote, + pj_str_t *out) +{ + char beg = (char)begin_quote; + char end = (char)end_quote; + pj_scan_get_quotes(scanner, &beg, &end, 1, out); +} + +PJ_DEF(void) pj_scan_get_quotes(pj_scanner *scanner, + const char *begin_quote, const char *end_quote, + int qsize, pj_str_t *out) +{ + register char *s = scanner->curptr; + int qpair = -1; + int i; + + pj_assert(qsize > 0); + + /* Check and eat the begin_quote. */ + for (i = 0; i < qsize; ++i) { + if (*s == begin_quote[i]) { + qpair = i; + break; + } + } + if (qpair == -1) { + pj_scan_syntax_err(scanner); + return; + } + ++s; + + /* Loop until end_quote is found. + */ + do { + /* loop until end_quote is found. */ + while (PJ_SCAN_CHECK_EOF(s) && *s != '\n' && *s != end_quote[qpair]) { + ++s; + } + + /* check that no backslash character precedes the end_quote. */ + if (*s == end_quote[qpair]) { + if (*(s-1) == '\\') { + if (s-2 == scanner->begin) { + break; + } else { + char *q = s-2; + char *r = s-2; + + while (r != scanner->begin && *r == '\\') { + --r; + } + /* break from main loop if we have odd number of backslashes */ + if (((unsigned)(q-r) & 0x01) == 1) { + ++s; + break; + } + ++s; + } + } else { + /* end_quote is not preceeded by backslash. break now. */ + break; + } + } else { + /* loop ended by non-end_quote character. break now. */ + break; + } + } while (1); + + /* Check and eat the end quote. */ + if (*s != end_quote[qpair]) { + pj_scan_syntax_err(scanner); + return; + } + ++s; + + pj_strset3(out, scanner->curptr, s); + + scanner->curptr = s; + + if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) { + pj_scan_skip_whitespace(scanner); + } +} + + +PJ_DEF(void) pj_scan_get_n( pj_scanner *scanner, + unsigned N, pj_str_t *out) +{ + if (scanner->curptr + N > scanner->end) { + pj_scan_syntax_err(scanner); + return; + } + + pj_strset(out, scanner->curptr, N); + + scanner->curptr += N; + + if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) { + pj_scan_skip_whitespace(scanner); + } +} + + +PJ_DEF(int) pj_scan_get_char( pj_scanner *scanner ) +{ + int chr = *scanner->curptr; + + if (!chr) { + pj_scan_syntax_err(scanner); + return 0; + } + + ++scanner->curptr; + + if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) { + pj_scan_skip_whitespace(scanner); + } + return chr; +} + + +PJ_DEF(void) pj_scan_get_newline( pj_scanner *scanner ) +{ + if (!PJ_SCAN_IS_NEWLINE(*scanner->curptr)) { + pj_scan_syntax_err(scanner); + return; + } + + if (*scanner->curptr == '\r') { + ++scanner->curptr; + } + if (*scanner->curptr == '\n') { + ++scanner->curptr; + } + + ++scanner->line; + scanner->start_line = scanner->curptr; + + /** + * This probably is a bug, see PROTOS test #2480. + * This would cause scanner to incorrectly eat two new lines, e.g. + * when parsing: + * + * Content-Length: 120\r\n + * \r\n + * <space><space><space>... + * + * When pj_scan_get_newline() is called to parse the first newline + * in the Content-Length header, it will eat the second newline + * too because it thinks that it's a header continuation. + * + * if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) { + * pj_scan_skip_whitespace(scanner); + * } + */ +} + + +PJ_DEF(void) pj_scan_get_until( pj_scanner *scanner, + const pj_cis_t *spec, pj_str_t *out) +{ + register char *s = scanner->curptr; + + if (s >= scanner->end) { + pj_scan_syntax_err(scanner); + return; + } + + while (PJ_SCAN_CHECK_EOF(s) && !pj_cis_match(spec, *s)) { + ++s; + } + + pj_strset3(out, scanner->curptr, s); + + scanner->curptr = s; + + if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) { + pj_scan_skip_whitespace(scanner); + } +} + + +PJ_DEF(void) pj_scan_get_until_ch( pj_scanner *scanner, + int until_char, pj_str_t *out) +{ + register char *s = scanner->curptr; + + if (s >= scanner->end) { + pj_scan_syntax_err(scanner); + return; + } + + while (PJ_SCAN_CHECK_EOF(s) && *s != until_char) { + ++s; + } + + pj_strset3(out, scanner->curptr, s); + + scanner->curptr = s; + + if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) { + pj_scan_skip_whitespace(scanner); + } +} + + +PJ_DEF(void) pj_scan_get_until_chr( pj_scanner *scanner, + const char *until_spec, pj_str_t *out) +{ + register char *s = scanner->curptr; + int speclen; + + if (s >= scanner->end) { + pj_scan_syntax_err(scanner); + return; + } + + speclen = strlen(until_spec); + while (PJ_SCAN_CHECK_EOF(s) && !memchr(until_spec, *s, speclen)) { + ++s; + } + + pj_strset3(out, scanner->curptr, s); + + scanner->curptr = s; + + if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) { + pj_scan_skip_whitespace(scanner); + } +} + +PJ_DEF(void) pj_scan_advance_n( pj_scanner *scanner, + unsigned N, pj_bool_t skip_ws) +{ + if (scanner->curptr + N > scanner->end) { + pj_scan_syntax_err(scanner); + return; + } + + scanner->curptr += N; + + if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && skip_ws) { + pj_scan_skip_whitespace(scanner); + } +} + + +PJ_DEF(int) pj_scan_strcmp( pj_scanner *scanner, const char *s, int len) +{ + if (scanner->curptr + len > scanner->end) { + pj_scan_syntax_err(scanner); + return -1; + } + return strncmp(scanner->curptr, s, len); +} + + +PJ_DEF(int) pj_scan_stricmp( pj_scanner *scanner, const char *s, int len) +{ + if (scanner->curptr + len > scanner->end) { + pj_scan_syntax_err(scanner); + return -1; + } + return pj_ansi_strnicmp(scanner->curptr, s, len); +} + +PJ_DEF(int) pj_scan_stricmp_alnum( pj_scanner *scanner, const char *s, + int len) +{ + if (scanner->curptr + len > scanner->end) { + pj_scan_syntax_err(scanner); + return -1; + } + return strnicmp_alnum(scanner->curptr, s, len); +} + +PJ_DEF(void) pj_scan_save_state( const pj_scanner *scanner, + pj_scan_state *state) +{ + state->curptr = scanner->curptr; + state->line = scanner->line; + state->start_line = scanner->start_line; +} + + +PJ_DEF(void) pj_scan_restore_state( pj_scanner *scanner, + pj_scan_state *state) +{ + scanner->curptr = state->curptr; + scanner->line = state->line; + scanner->start_line = state->start_line; +} + + |