From 9621c9bcbca70f61605606db8517a789f53f8600 Mon Sep 17 00:00:00 2001 From: Richard Mudgett Date: Tue, 11 Oct 2016 18:14:39 -0500 Subject: json: Add UTF-8 check call. Since the json library does not make the check function public we recreate/copy the function in our interface module. ASTERISK-26466 Reported by: Richard Mudgett Change-Id: I36d3d750b6f5f1a110bc69ea92b435ecdeeb2a99 --- include/asterisk/json.h | 35 ++++++++++++++ main/json.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+) diff --git a/include/asterisk/json.h b/include/asterisk/json.h index 28ebfbd51..cfd9a2997 100644 --- a/include/asterisk/json.h +++ b/include/asterisk/json.h @@ -216,6 +216,41 @@ const char *ast_json_typename(enum ast_json_type type); /*!@{*/ +/*! + * \brief Check the string of the given length for UTF-8 format. + * \since 13.12.0 + * + * \param str String to check. + * \param len Length of string to check. + * + * \retval 0 if not UTF-8 encoded or str is NULL. + * \retval 1 if UTF-8 encoded. + */ +int ast_json_utf8_check_len(const char *str, size_t len); + +/*! + * \brief Check the nul terminated string for UTF-8 format. + * \since 13.12.0 + * + * \param str String to check. + * + * \retval 0 if not UTF-8 encoded or str is NULL. + * \retval 1 if UTF-8 encoded. + */ +int ast_json_utf8_check(const char *str); + +/*! + * \brief Check str for UTF-8 and replace with an empty string if fails the check. + * + * \note The convenience macro is normally used with ast_json_pack() + * or a function wrapper that calls ast_json_vpack(). + */ +#define AST_JSON_UTF8_VALIDATE(str) (ast_json_utf8_check(str) ? (str) : "") + +/*!@}*/ + +/*!@{*/ + /*! * \brief Get the JSON true value. * \since 12.0.0 diff --git a/main/json.c b/main/json.c index 35e6f16ce..78a47cd58 100644 --- a/main/json.c +++ b/main/json.c @@ -269,6 +269,127 @@ const char *ast_json_typename(enum ast_json_type type) return "?"; } +/* Ported from libjansson utf.c:utf8_check_first() */ +static size_t json_utf8_check_first(char byte) +{ + unsigned char ch = (unsigned char) byte; + + if (ch < 0x80) { + return 1; + } + + if (0x80 <= ch && ch <= 0xBF) { + /* second, third or fourth byte of a multi-byte + sequence, i.e. a "continuation byte" */ + return 0; + } else if (ch == 0xC0 || ch == 0xC1) { + /* overlong encoding of an ASCII byte */ + return 0; + } else if (0xC2 <= ch && ch <= 0xDF) { + /* 2-byte sequence */ + return 2; + } else if (0xE0 <= ch && ch <= 0xEF) { + /* 3-byte sequence */ + return 3; + } else if (0xF0 <= ch && ch <= 0xF4) { + /* 4-byte sequence */ + return 4; + } else { /* ch >= 0xF5 */ + /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid + UTF-8 */ + return 0; + } +} + +/* Ported from libjansson utf.c:utf8_check_full() */ +static size_t json_utf8_check_full(const char *str, size_t len) +{ + size_t pos; + int32_t value; + unsigned char ch = (unsigned char) str[0]; + + if (len == 2) { + value = ch & 0x1F; + } else if (len == 3) { + value = ch & 0xF; + } else if (len == 4) { + value = ch & 0x7; + } else { + return 0; + } + + for (pos = 1; pos < len; ++pos) { + ch = (unsigned char) str[pos]; + if (ch < 0x80 || ch > 0xBF) { + /* not a continuation byte */ + return 0; + } + + value = (value << 6) + (ch & 0x3F); + } + + if (value > 0x10FFFF) { + /* not in Unicode range */ + return 0; + } else if (0xD800 <= value && value <= 0xDFFF) { + /* invalid code point (UTF-16 surrogate halves) */ + return 0; + } else if ((len == 2 && value < 0x80) + || (len == 3 && value < 0x800) + || (len == 4 && value < 0x10000)) { + /* overlong encoding */ + return 0; + } + + return 1; +} + +int ast_json_utf8_check_len(const char *str, size_t len) +{ + size_t pos; + size_t count; + int res = 1; + + if (!str) { + return 0; + } + + /* + * Since the json library does not make the check function + * public we recreate/copy the function in our interface + * module. + * + * Loop ported from libjansson utf.c:utf8_check_string() + */ + for (pos = 0; pos < len; pos += count) { + count = json_utf8_check_first(str[pos]); + if (count == 0) { + res = 0; + break; + } else if (count > 1) { + if (count > len - pos) { + /* UTF-8 needs more than we have left in the string. */ + res = 0; + break; + } + + if (!json_utf8_check_full(&str[pos], count)) { + res = 0; + break; + } + } + } + + if (!res) { + ast_debug(1, "String '%.*s' is not UTF-8 for json conversion\n", (int) len, str); + } + return res; +} + +int ast_json_utf8_check(const char *str) +{ + return str ? ast_json_utf8_check_len(str, strlen(str)) : 0; +} struct ast_json *ast_json_true(void) { -- cgit v1.2.3