diff options
Diffstat (limited to 'pjlib-util')
-rw-r--r-- | pjlib-util/include/pjlib-util/scanner.h | 235 | ||||
-rw-r--r-- | pjlib-util/src/pjlib-util/scanner.c | 121 |
2 files changed, 222 insertions, 134 deletions
diff --git a/pjlib-util/include/pjlib-util/scanner.h b/pjlib-util/include/pjlib-util/scanner.h index f1b0b133..711eeed7 100644 --- a/pjlib-util/include/pjlib-util/scanner.h +++ b/pjlib-util/include/pjlib-util/scanner.h @@ -1,9 +1,7 @@ /* $Id$ - * */ - -#ifndef __PJ_PARSER_H__ -#define __PJ_PARSER_H__ +#ifndef __PJ_SCANNER_H__ +#define __PJ_SCANNER_H__ /** * @file scanner.h @@ -18,115 +16,175 @@ PJ_BEGIN_DECL * @defgroup PJ_SCAN Text Scanning * @ingroup PJ_MISC * @brief - * Text scanning utility. - */ - -/** - * @defgroup PJ_CHARSPEC Character Filter Specification - * @ingroup PJ_SCAN - * @brief - * The type pj_char_spec is a specification of character set used in - * scanner. Application can define multiple character specs, such as to - * scan alpha numerics, numbers, tokens, etc. + * Text scanning utility.
+ *
* @{ */ +/**
+ * This describes the type of individual character specification in
+ * #pj_cis_buf_t. Basicly the number of bits here
+ */
+#ifndef PJ_CIS_ELEM_TYPE
+# define PJ_CIS_ELEM_TYPE pj_uint32_t
+#endif
+ /** * This describes the type of individual character specification in - * #pj_char_spec. + * #pj_cis_buf_t. */ -typedef pj_uint8_t pj_char_spec_element_t; +typedef PJ_CIS_ELEM_TYPE pj_cis_elem_t; +
+/**
+ * Maximum number of input specification in a buffer.
+ * Effectively this means the number of bits in pj_cis_elem_t.
+ */
+#define PJ_CIS_MAX_INDEX (sizeof(pj_cis_elem_t) << 3)
/** - * The character specification is implemented as array of boolean flags. Each - * flag indicates the membership of the character in the spec. If the flag - * at one position is non-zero, then the character at that position belongs - * to the specification, and vice versa. - */ -typedef pj_char_spec_element_t pj_char_spec[256]; -// Note: it's got to be 256 (not 128) to cater for extended character in input. + * The scanner input specification buffer. + */ +typedef struct pj_cis_buf_t
+{
+ pj_cis_elem_t cis_buf[256]; /**< Must be 256 (not 128)! */
+ pj_cis_elem_t use_mask; /**< To keep used indexes. */
+} pj_cis_buf_t;
+ +/**
+ * Character input specification.
+ */
+typedef struct pj_cis_t
+{
+ pj_cis_elem_t *cis_buf; /**< Pointer to buffer. */
+ int cis_id; /**< Id. */
+} pj_cis_t;
/** - * Initialize character spec. - * @param cs the scanner character specification. - */ -PJ_DECL(void) pj_cs_init( pj_char_spec cs); + * Initialize scanner input specification buffer.
+ * + * @param cs_buf The scanner character specification. + */ +PJ_DECL(void) pj_cis_buf_init(pj_cis_buf_t *cs_buf); +
+/**
+ * Create a new input specification.
+ *
+ * @param cs_buf Specification buffer.
+ * @param cis Character input specification to be initialized.
+ *
+ * @return PJ_SUCCESS if new specification has been successfully
+ * created, or PJ_ETOOMANY if there are already too many
+ * specifications in the buffer.
+ */
+PJ_DECL(pj_status_t) pj_cis_init(pj_cis_buf_t *cs_buf, pj_cis_t *cis);
+
+/**
+ * Create a new input specification based on an existing specification.
+ *
+ * @param new_cis The new specification to be initialized.
+ * @param existing The existing specification, from which the input
+ * bitmask will be copied to the new specification.
+ *
+ * @return PJ_SUCCESS if new specification has been successfully
+ * created, or PJ_ETOOMANY if there are already too many
+ * specifications in the buffer.
+ */
+PJ_DECL(pj_status_t) pj_cis_dup(pj_cis_t *new_cis, pj_cis_t *existing);
/** - * Set the membership of the specified character to TRUE. - * @param cs the scanner character specification. - * @param c the character. - */ -PJ_DECL(void) pj_cs_set( pj_char_spec cs, int c); + * Set the membership of the specified character.
+ * Note that this is a macro, and arguments may be evaluated more than once.
+ * + * @param cis Pointer to character input specification. + * @param c The character. + */ +#define PJ_CIS_SET(cis,c) ((cis)->cis_buf[(c)] |= (1 << (cis)->cis_id)) +
+/**
+ * Remove the membership of the specified character.
+ * Note that this is a macro, and arguments may be evaluated more than once.
+ *
+ * @param cis Pointer to character input specification.
+ * @param c The character to be removed from the membership.
+ */
+#define PJ_CIS_CLR(cis,c) ((cis)->cis_buf[c] &= ~(1 << (cis)->cis_id))
+
+/**
+ * Check the membership of the specified character.
+ * Note that this is a macro, and arguments may be evaluated more than once.
+ *
+ * @param cis Pointer to character input specification.
+ * @param c The character.
+ */
+#define PJ_CIS_ISSET(cis,c) ((cis)->cis_buf[c] & (1 << (cis)->cis_id))
/** * Add the characters in the specified range '[cstart, cend)' to the - * specification (the last character itself ('cend') is not added). - * @param cs the scanner character specification. - * @param cstart the first character in the range. - * @param cend the next character after the last character in the range. + * specification (the last character itself ('cend') is not added).
+ * + * @param cis The scanner character specification. + * @param cstart The first character in the range. + * @param cend The next character after the last character in the range. */ -PJ_DECL(void) pj_cs_add_range( pj_char_spec cs, int cstart, int cend); +PJ_DECL(void) pj_cis_add_range( pj_cis_t *cis, int cstart, int cend); /** - * Add alphabetic characters to the specification. - * @param cs the scanner character specification. + * Add alphabetic characters to the specification.
+ * + * @param cis The scanner character specification. */ -PJ_DECL(void) pj_cs_add_alpha( pj_char_spec cs); +PJ_DECL(void) pj_cis_add_alpha( pj_cis_t *cis); /** - * Add numeric characters to the specification. - * @param cs the scanner character specification. + * Add numeric characters to the specification.
+ * + * @param cis The scanner character specification. */ -PJ_DECL(void) pj_cs_add_num( pj_char_spec cs); +PJ_DECL(void) pj_cis_add_num( pj_cis_t *cis); /** - * Add the characters in the string to the specification. - * @param cs the scanner character specification. - * @param str the string. + * Add the characters in the string to the specification.
+ * + * @param cis The scanner character specification. + * @param str The string. */ -PJ_DECL(void) pj_cs_add_str( pj_char_spec cs, const char *str); +PJ_DECL(void) pj_cis_add_str( pj_cis_t *cis, const char *str); /** - * Delete characters in the specified range from the specification. - * @param cs the scanner character specification. - * @param cstart the first character in the range. - * @param cend the next character after the last character in the range. + * Delete characters in the specified range from the specification.
+ * + * @param cis The scanner character specification. + * @param cstart The first character in the range. + * @param cend The next character after the last character in the range. */ -PJ_DECL(void) pj_cs_del_range( pj_char_spec cs, int cstart, int cend); +PJ_DECL(void) pj_cis_del_range( pj_cis_t *cis, int cstart, int cend); /** - * Delete characters in the specified string from the specification. - * @param cs the scanner character specification. - * @param str the string. + * Delete characters in the specified string from the specification.
+ * + * @param cis The scanner character specification. + * @param str The string. */ -PJ_DECL(void) pj_cs_del_str( pj_char_spec cs, const char *str); +PJ_DECL(void) pj_cis_del_str( pj_cis_t *cis, const char *str); /** - * Invert specification. - * @param cs the scanner character specification. + * Invert specification.
+ * + * @param cis The scanner character specification. */ -PJ_DECL(void) pj_cs_invert( pj_char_spec cs ); +PJ_DECL(void) pj_cis_invert( pj_cis_t *cis ); /** - * Check whether the specified character belongs to the specification. - * @param cs the scanner character specification. - * @param c the character to check for matching. + * Check whether the specified character belongs to the specification.
+ * + * @param cis The scanner character specification. + * @param c The character to check for matching. */ -PJ_INLINE(int) pj_cs_match( const pj_char_spec cs, int c ) +PJ_INLINE(int) pj_cis_match( const pj_cis_t *cis, int c ) { - return cs[c]; + return PJ_CIS_ISSET(cis, c); } -/** - * @} - */ - -/** - * @defgroup PJ_SCANNER Text Scanner - * @ingroup PJ_SCAN - * @{ - */ /** * Flags for scanner. @@ -155,8 +213,9 @@ struct pj_scanner; /** * The callback function type to be called by the scanner when it encounters - * syntax error. - * @param scanner The scanner instance that calls the callback . + * syntax error.
+ * + * @param scanner The scanner instance that calls the callback . */ typedef void (*pj_syn_err_func_ptr)(struct pj_scanner *scanner); @@ -244,7 +303,7 @@ PJ_INLINE(int) pj_scan_is_eof( const pj_scanner *scanner) * no more characters. */ PJ_DECL(int) pj_scan_peek( pj_scanner *scanner, - const pj_char_spec spec, pj_str_t *out); + const pj_cis_t *spec, pj_str_t *out); /** @@ -261,7 +320,7 @@ PJ_DECL(int) pj_scan_peek( pj_scanner *scanner, * no more characters. */ PJ_DECL(int) pj_scan_peek_n( pj_scanner *scanner, - pj_size_t len, pj_str_t *out); + pj_size_t len, pj_str_t *out); /** @@ -277,8 +336,8 @@ PJ_DECL(int) pj_scan_peek_n( pj_scanner *scanner, * @return the character right after the peek-ed position. */ PJ_DECL(int) pj_scan_peek_until( pj_scanner *scanner, - const pj_char_spec spec, - pj_str_t *out); + const pj_cis_t *spec, + pj_str_t *out); /** @@ -293,7 +352,7 @@ PJ_DECL(int) pj_scan_peek_until( pj_scanner *scanner, * @param out String to store the result. */ PJ_DECL(void) pj_scan_get( pj_scanner *scanner, - const pj_char_spec spec, pj_str_t *out); + const pj_cis_t *spec, pj_str_t *out); /** @@ -317,7 +376,7 @@ PJ_DECL(void) pj_scan_get_quote( pj_scanner *scanner, * @param out String to store the result. */ PJ_DECL(void) pj_scan_get_n( pj_scanner *scanner, - unsigned N, pj_str_t *out); + unsigned N, pj_str_t *out); /** @@ -325,7 +384,7 @@ PJ_DECL(void) pj_scan_get_n( pj_scanner *scanner, * * @param scanner The scanner. * - * @return (unknown) + * @return The character. */ PJ_DECL(int) pj_scan_get_char( pj_scanner *scanner ); @@ -348,7 +407,7 @@ PJ_DECL(void) pj_scan_get_newline( pj_scanner *scanner ); * @param out String to store the result. */ PJ_DECL(void) pj_scan_get_until( pj_scanner *scanner, - const pj_char_spec spec, pj_str_t *out); + const pj_cis_t *spec, pj_str_t *out); /** @@ -360,7 +419,7 @@ PJ_DECL(void) pj_scan_get_until( pj_scanner *scanner, * @param out String to store the result. */ PJ_DECL(void) pj_scan_get_until_ch( pj_scanner *scanner, - int until_char, pj_str_t *out); + int until_char, pj_str_t *out); /** @@ -372,7 +431,7 @@ PJ_DECL(void) pj_scan_get_until_ch( pj_scanner *scanner, * @param out String to store the result. */ PJ_DECL(void) pj_scan_get_until_chr( pj_scanner *scanner, - const char *until_spec, pj_str_t *out); + const char *until_spec, pj_str_t *out); /** * Advance the scanner N characters, and skip whitespace @@ -384,7 +443,7 @@ PJ_DECL(void) pj_scan_get_until_chr( pj_scanner *scanner, * after skipping the characters. */ PJ_DECL(void) pj_scan_advance_n( pj_scanner *scanner, - unsigned N, pj_bool_t skip); + unsigned N, pj_bool_t skip); /** @@ -445,10 +504,6 @@ PJ_DECL(void) pj_scan_restore_state( pj_scanner *scanner, * @} */ -#if PJ_FUNCTIONS_ARE_INLINED -# include "scanner_i.h" -#endif - PJ_END_DECL diff --git a/pjlib-util/src/pjlib-util/scanner.c b/pjlib-util/src/pjlib-util/scanner.c index 65e3f351..0e753dfb 100644 --- a/pjlib-util/src/pjlib-util/scanner.c +++ b/pjlib-util/src/pjlib-util/scanner.c @@ -4,6 +4,7 @@ #include <pj/string.h> #include <pj/except.h> #include <pj/os.h> +#include <pj/errno.h>
#define PJ_SCAN_IS_SPACE(c) ((c)==' ' || (c)=='\t') #define PJ_SCAN_IS_NEWLINE(c) ((c)=='\r' || (c)=='\n') @@ -15,67 +16,99 @@ static void pj_scan_syntax_err(pj_scanner *scanner) (*scanner->callback)(scanner); } -PJ_DEF(void) pj_cs_init( pj_char_spec cs) +PJ_DEF(void) pj_cis_buf_init( pj_cis_buf_t *cis_buf) { - PJ_CHECK_STACK(); - memset(cs, 0, sizeof(cs)); + pj_memset(cis_buf->cis_buf, 0, sizeof(cis_buf->cis_buf));
+ cis_buf->use_mask = 0; } - -PJ_DEF(void) pj_cs_set( pj_char_spec cs, int c) +
+PJ_DEF(pj_status_t) pj_cis_init(pj_cis_buf_t *cis_buf, pj_cis_t *cis)
+{
+ unsigned i;
+
+ cis->cis_buf = cis_buf->cis_buf;
+
+ for (i=0; i<PJ_CIS_MAX_INDEX; ++i) {
+ if ((cis_buf->use_mask & (1 << i)) == 0) {
+ cis->cis_index = i;
+ return PJ_SUCCESS;
+ }
+ }
+
+ cis->cis_index = PJ_CIS_MAX_INDEX;
+ return PJ_ETOOMANY;
+}
+
+PJ_DEF(pj_status_t) pj_cis_dup( pj_cis_t *new_cis, pj_cis_t *existing)
+{
+ pj_status_t status;
+ unsigned i;
+
+ status = pj_cis_init(existing->cis_buf, new_cis);
+ if (status != PJ_SUCCESS)
+ return status;
+
+ for (i=0; i<256; ++i) {
+ if (PJ_CIS_ISSET(existing, i))
+ PJ_CIS_SET(new_cis, i);
+ else
+ PJ_CIS_CLR(new_cis, i);
+ }
+
+ return PJ_SUCCESS;
+}
+ +PJ_DEF(void) pj_cis_add_range(pj_cis_t *cis, int cstart, int cend) { - PJ_CHECK_STACK(); - cs[c] = 1; -} - -PJ_DEF(void) pj_cs_add_range( pj_char_spec cs, int cstart, int cend) -{ - PJ_CHECK_STACK(); - while (cstart != cend) - cs[cstart++] = 1; + while (cstart != cend) {
+ PJ_CIS_SET(cis, cstart); + ++cstart;
+ } } -PJ_DEF(void) pj_cs_add_alpha( pj_char_spec cs) +PJ_DEF(void) pj_cis_add_alpha(pj_cis_t *cis) { - pj_cs_add_range( cs, 'a', 'z'+1); - pj_cs_add_range( cs, 'A', 'Z'+1); + pj_cis_add_range( cis, 'a', 'z'+1); + pj_cis_add_range( cis, 'A', 'Z'+1); } -PJ_DEF(void) pj_cs_add_num( pj_char_spec cs) +PJ_DEF(void) pj_cis_add_num(pj_cis_t *cis) { - pj_cs_add_range( cs, '0', '9'+1); + pj_cis_add_range( cis, '0', '9'+1); } -PJ_DEF(void) pj_cs_add_str( pj_char_spec cs, const char *str) +PJ_DEF(void) pj_cis_add_str( pj_cis_t *cis, const char *str) { - PJ_CHECK_STACK(); while (*str) { - cs[(int)*str] = 1; + PJ_CIS_SET(cis, *str); ++str; } } -PJ_DEF(void) pj_cs_del_range( pj_char_spec cs, int cstart, int cend) +PJ_DEF(void) pj_cis_del_range( pj_cis_t *cis, int cstart, int cend) { - PJ_CHECK_STACK(); - while (cstart != cend) - cs[cstart++] = 0; + while (cstart != cend) { + PJ_CIS_CLR(cis, cstart);
+ cstart++;
+ } } -PJ_DEF(void) pj_cs_del_str( pj_char_spec cs, const char *str) +PJ_DEF(void) pj_cis_del_str( pj_cis_t *cis, const char *str) { - PJ_CHECK_STACK(); while (*str) { - cs[(int)*str] = 0; + PJ_CIS_CLR(cis, *str); ++str; } } -PJ_DEF(void) pj_cs_invert( pj_char_spec cs ) +PJ_DEF(void) pj_cis_invert( pj_cis_t *cis ) { unsigned i; - PJ_CHECK_STACK(); - for (i=0; i<sizeof(pj_char_spec)/sizeof(cs[0]); ++i) { - cs[i] = (pj_char_spec_element_t) !cs[i]; + for (i=0; i<256; ++i) { + if (PJ_CIS_ISSET(cis,i))
+ PJ_CIS_CLR(cis,i);
+ else
+ PJ_CIS_SET(cis,i); } } @@ -165,7 +198,7 @@ PJ_DEF(void) pj_scan_skip_whitespace( pj_scanner *scanner ) } PJ_DEF(int) pj_scan_peek( pj_scanner *scanner, - const pj_char_spec spec, pj_str_t *out) + const pj_cis_t *spec, pj_str_t *out) { register char *s = scanner->curptr; register char *end = scanner->end; @@ -177,7 +210,7 @@ PJ_DEF(int) pj_scan_peek( pj_scanner *scanner, return -1; } - while (PJ_SCAN_CHECK_EOF(s) && pj_cs_match(spec, *s)) + while (PJ_SCAN_CHECK_EOF(s) && pj_cis_match(spec, *s)) ++s; pj_strset3(out, scanner->curptr, s); @@ -203,8 +236,8 @@ PJ_DEF(int) pj_scan_peek_n( pj_scanner *scanner, PJ_DEF(int) pj_scan_peek_until( pj_scanner *scanner, - const pj_char_spec spec, - pj_str_t *out) + const pj_cis_t *spec, + pj_str_t *out) { register char *s = scanner->curptr; register char *end = scanner->end; @@ -216,7 +249,7 @@ PJ_DEF(int) pj_scan_peek_until( pj_scanner *scanner, return -1; } - while (PJ_SCAN_CHECK_EOF(s) && !pj_cs_match( spec, *s)) + while (PJ_SCAN_CHECK_EOF(s) && !pj_cis_match( spec, *s)) ++s; pj_strset3(out, scanner->curptr, s); @@ -225,7 +258,7 @@ PJ_DEF(int) pj_scan_peek_until( pj_scanner *scanner, PJ_DEF(void) pj_scan_get( pj_scanner *scanner, - const pj_char_spec spec, pj_str_t *out) + const pj_cis_t *spec, pj_str_t *out) { register char *s = scanner->curptr; register char *end = scanner->end; @@ -233,14 +266,14 @@ PJ_DEF(void) pj_scan_get( pj_scanner *scanner, PJ_CHECK_STACK(); - if (pj_scan_is_eof(scanner) || !pj_cs_match(spec, *s)) { + if (pj_scan_is_eof(scanner) || !pj_cis_match(spec, *s)) { pj_scan_syntax_err(scanner); return; } do { ++s; - } while (PJ_SCAN_CHECK_EOF(s) && pj_cs_match(spec, *s)); + } while (PJ_SCAN_CHECK_EOF(s) && pj_cis_match(spec, *s)); pj_strset3(out, scanner->curptr, s); @@ -395,7 +428,7 @@ PJ_DEF(void) pj_scan_get_newline( pj_scanner *scanner ) PJ_DEF(void) pj_scan_get_until( pj_scanner *scanner, - const pj_char_spec spec, pj_str_t *out) + const pj_cis_t *spec, pj_str_t *out) { register char *s = scanner->curptr; register char *end = scanner->end; @@ -408,7 +441,7 @@ PJ_DEF(void) pj_scan_get_until( pj_scanner *scanner, return; } - while (PJ_SCAN_CHECK_EOF(s) && !pj_cs_match(spec, *s)) { + while (PJ_SCAN_CHECK_EOF(s) && !pj_cis_match(spec, *s)) { ++s; } @@ -424,7 +457,7 @@ PJ_DEF(void) pj_scan_get_until( pj_scanner *scanner, PJ_DEF(void) pj_scan_get_until_ch( pj_scanner *scanner, - int until_char, pj_str_t *out) + int until_char, pj_str_t *out) { register char *s = scanner->curptr; register char *end = scanner->end; |