diff options
author | Richard Mudgett <rmudgett@digium.com> | 2016-10-11 18:14:39 -0500 |
---|---|---|
committer | Richard Mudgett <rmudgett@digium.com> | 2016-10-13 18:11:36 -0500 |
commit | 9621c9bcbca70f61605606db8517a789f53f8600 (patch) | |
tree | abe9d0283acaf7d438642abe071cb0ed3e72f7b8 /main | |
parent | 3633c7926cdc102570a5f179aae8a9017bd0a034 (diff) |
json: Add UTF-8 check call.
Since the json library does not make the check function public we
recreate/copy the function in our interface module.
ASTERISK-26466
Reported by: Richard Mudgett
Change-Id: I36d3d750b6f5f1a110bc69ea92b435ecdeeb2a99
Diffstat (limited to 'main')
-rw-r--r-- | main/json.c | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/main/json.c b/main/json.c index 35e6f16ce..78a47cd58 100644 --- a/main/json.c +++ b/main/json.c @@ -269,6 +269,127 @@ const char *ast_json_typename(enum ast_json_type type) return "?"; } +/* Ported from libjansson utf.c:utf8_check_first() */ +static size_t json_utf8_check_first(char byte) +{ + unsigned char ch = (unsigned char) byte; + + if (ch < 0x80) { + return 1; + } + + if (0x80 <= ch && ch <= 0xBF) { + /* second, third or fourth byte of a multi-byte + sequence, i.e. a "continuation byte" */ + return 0; + } else if (ch == 0xC0 || ch == 0xC1) { + /* overlong encoding of an ASCII byte */ + return 0; + } else if (0xC2 <= ch && ch <= 0xDF) { + /* 2-byte sequence */ + return 2; + } else if (0xE0 <= ch && ch <= 0xEF) { + /* 3-byte sequence */ + return 3; + } else if (0xF0 <= ch && ch <= 0xF4) { + /* 4-byte sequence */ + return 4; + } else { /* ch >= 0xF5 */ + /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid + UTF-8 */ + return 0; + } +} + +/* Ported from libjansson utf.c:utf8_check_full() */ +static size_t json_utf8_check_full(const char *str, size_t len) +{ + size_t pos; + int32_t value; + unsigned char ch = (unsigned char) str[0]; + + if (len == 2) { + value = ch & 0x1F; + } else if (len == 3) { + value = ch & 0xF; + } else if (len == 4) { + value = ch & 0x7; + } else { + return 0; + } + + for (pos = 1; pos < len; ++pos) { + ch = (unsigned char) str[pos]; + if (ch < 0x80 || ch > 0xBF) { + /* not a continuation byte */ + return 0; + } + + value = (value << 6) + (ch & 0x3F); + } + + if (value > 0x10FFFF) { + /* not in Unicode range */ + return 0; + } else if (0xD800 <= value && value <= 0xDFFF) { + /* invalid code point (UTF-16 surrogate halves) */ + return 0; + } else if ((len == 2 && value < 0x80) + || (len == 3 && value < 0x800) + || (len == 4 && value < 0x10000)) { + /* overlong encoding */ + return 0; + } + + return 1; +} + +int ast_json_utf8_check_len(const char *str, size_t len) +{ + size_t pos; + size_t count; + int res = 1; + + if (!str) { + return 0; + } + + /* + * Since the json library does not make the check function + * public we recreate/copy the function in our interface + * module. + * + * Loop ported from libjansson utf.c:utf8_check_string() + */ + for (pos = 0; pos < len; pos += count) { + count = json_utf8_check_first(str[pos]); + if (count == 0) { + res = 0; + break; + } else if (count > 1) { + if (count > len - pos) { + /* UTF-8 needs more than we have left in the string. */ + res = 0; + break; + } + + if (!json_utf8_check_full(&str[pos], count)) { + res = 0; + break; + } + } + } + + if (!res) { + ast_debug(1, "String '%.*s' is not UTF-8 for json conversion\n", (int) len, str); + } + return res; +} + +int ast_json_utf8_check(const char *str) +{ + return str ? ast_json_utf8_check_len(str, strlen(str)) : 0; +} struct ast_json *ast_json_true(void) { |