BLI_string_utf8: add buffer size arg to BLI_str_utf8_from_unicode
Besides helping to avoid buffer overflow errors this reduces complexity of BLI_str_utf32_as_utf8 which needed a special loop for the last 6 characters to avoid writing past the buffer bounds. Also add BLI_str_utf8_from_unicode_len which only returns the length.
This commit is contained in:
@@ -653,10 +653,10 @@ static Object *find_family_object(
|
||||
return *ob_pt;
|
||||
}
|
||||
|
||||
char ch_utf8[7];
|
||||
char ch_utf8[BLI_UTF8_MAX + 1];
|
||||
size_t ch_utf8_len;
|
||||
|
||||
ch_utf8_len = BLI_str_utf8_from_unicode(ch, ch_utf8);
|
||||
ch_utf8_len = BLI_str_utf8_from_unicode(ch, ch_utf8, sizeof(ch_utf8) - 1);
|
||||
ch_utf8[ch_utf8_len] = '\0';
|
||||
ch_utf8_len += 1; /* Compare with null terminator. */
|
||||
|
||||
|
||||
@@ -329,7 +329,8 @@ int txt_extended_ascii_as_utf8(char **str)
|
||||
|
||||
memcpy(newstr + mi, (*str) + i, bad_char);
|
||||
|
||||
BLI_str_utf8_from_unicode((*str)[i + bad_char], newstr + mi + bad_char);
|
||||
const int mofs = mi + bad_char;
|
||||
BLI_str_utf8_from_unicode((*str)[i + bad_char], newstr + mofs, (length + added) - mofs);
|
||||
i += bad_char + 1;
|
||||
mi += bad_char + 2;
|
||||
}
|
||||
@@ -2005,7 +2006,7 @@ static bool txt_add_char_intern(Text *text, unsigned int add, bool replace_tabs)
|
||||
|
||||
txt_delete_sel(text);
|
||||
|
||||
add_len = BLI_str_utf8_from_unicode(add, ch);
|
||||
add_len = BLI_str_utf8_from_unicode(add, ch, sizeof(ch));
|
||||
|
||||
tmp = MEM_mallocN(text->curl->len + add_len + 1, "textline_string");
|
||||
|
||||
@@ -2061,7 +2062,7 @@ bool txt_replace_char(Text *text, unsigned int add)
|
||||
del = BLI_str_utf8_as_unicode_step(text->curl->line, text->curl->len, &del_size);
|
||||
del_size -= text->curc;
|
||||
UNUSED_VARS(del);
|
||||
add_size = BLI_str_utf8_from_unicode(add, ch);
|
||||
add_size = BLI_str_utf8_from_unicode(add, ch, sizeof(ch));
|
||||
|
||||
if (add_size > del_size) {
|
||||
char *tmp = MEM_mallocN(text->curl->len + add_size - del_size + 1, "textline_string");
|
||||
|
||||
@@ -48,7 +48,9 @@ unsigned int BLI_str_utf8_as_unicode_step_or_error(
|
||||
const char *__restrict p, size_t p_len, size_t *__restrict index) ATTR_WARN_UNUSED_RESULT
|
||||
ATTR_NONNULL(1, 3);
|
||||
|
||||
size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf);
|
||||
size_t BLI_str_utf8_from_unicode_len(unsigned int c) ATTR_WARN_UNUSED_RESULT;
|
||||
size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf, const size_t outbuf_len)
|
||||
ATTR_NONNULL(2);
|
||||
size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
|
||||
const char *__restrict src_c,
|
||||
const size_t maxncpy) ATTR_NONNULL(1, 2);
|
||||
|
||||
@@ -296,36 +296,19 @@ size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst,
|
||||
const wchar_t *__restrict src,
|
||||
const size_t maxncpy)
|
||||
{
|
||||
const size_t maxlen = maxncpy - 1;
|
||||
/* #BLI_UTF8_MAX is max utf8 length of an unicode char. */
|
||||
const int64_t maxlen_secured = (int64_t)maxlen - BLI_UTF8_MAX;
|
||||
size_t len = 0;
|
||||
|
||||
BLI_assert(maxncpy != 0);
|
||||
|
||||
size_t len = 0;
|
||||
#ifdef DEBUG_STRSIZE
|
||||
memset(dst, 0xff, sizeof(*dst) * maxncpy);
|
||||
#endif
|
||||
|
||||
while (*src && len <= maxlen_secured) {
|
||||
len += BLI_str_utf8_from_unicode((uint)*src++, dst + len);
|
||||
while (*src && len < maxncpy) {
|
||||
len += BLI_str_utf8_from_unicode((uint)*src++, dst + len, maxncpy - len);
|
||||
}
|
||||
|
||||
/* We have to be more careful for the last six bytes,
|
||||
* to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */
|
||||
while (*src) {
|
||||
char t[BLI_UTF8_MAX];
|
||||
size_t l = BLI_str_utf8_from_unicode((uint)*src++, t);
|
||||
BLI_assert(l <= BLI_UTF8_MAX);
|
||||
if (len + l > maxlen) {
|
||||
break;
|
||||
}
|
||||
memcpy(dst + len, t, l);
|
||||
len += l;
|
||||
}
|
||||
|
||||
dst[len] = '\0';
|
||||
|
||||
/* Return the correct length when part of the final byte did not fit into the string. */
|
||||
while ((len > 0) && UNLIKELY(dst[len - 1] == '\0')) {
|
||||
len--;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
@@ -335,7 +318,7 @@ size_t BLI_wstrlen_utf8(const wchar_t *src)
|
||||
size_t len = 0;
|
||||
|
||||
while (*src) {
|
||||
len += BLI_str_utf8_from_unicode((uint)*src++, NULL);
|
||||
len += BLI_str_utf8_from_unicode_len((uint)*src++);
|
||||
}
|
||||
|
||||
return len;
|
||||
@@ -608,56 +591,78 @@ uint BLI_str_utf8_as_unicode_step(const char *__restrict p,
|
||||
}
|
||||
|
||||
/* was g_unichar_to_utf8 */
|
||||
/**
|
||||
* BLI_str_utf8_from_unicode:
|
||||
* \param c: a Unicode character code
|
||||
* \param outbuf: output buffer, must have at least 6 bytes of space.
|
||||
* If %NULL, the length will be computed and returned
|
||||
* and nothing will be written to outbuf.
|
||||
*
|
||||
* Converts a single character to UTF-8.
|
||||
*
|
||||
* \return number of bytes written
|
||||
*/
|
||||
size_t BLI_str_utf8_from_unicode(uint c, char *outbuf)
|
||||
|
||||
#define UTF8_VARS_FROM_CHAR32(Char, First, Len) \
|
||||
if (Char < 0x80) { \
|
||||
First = 0; \
|
||||
Len = 1; \
|
||||
} \
|
||||
else if (Char < 0x800) { \
|
||||
First = 0xc0; \
|
||||
Len = 2; \
|
||||
} \
|
||||
else if (Char < 0x10000) { \
|
||||
First = 0xe0; \
|
||||
Len = 3; \
|
||||
} \
|
||||
else if (Char < 0x200000) { \
|
||||
First = 0xf0; \
|
||||
Len = 4; \
|
||||
} \
|
||||
else if (Char < 0x4000000) { \
|
||||
First = 0xf8; \
|
||||
Len = 5; \
|
||||
} \
|
||||
else { \
|
||||
First = 0xfc; \
|
||||
Len = 6; \
|
||||
} \
|
||||
(void)0
|
||||
|
||||
size_t BLI_str_utf8_from_unicode_len(const uint c)
|
||||
{
|
||||
/* If this gets modified, also update the copy in g_string_insert_unichar() */
|
||||
uint len = 0;
|
||||
uint first;
|
||||
uint i;
|
||||
|
||||
if (c < 0x80) {
|
||||
first = 0;
|
||||
len = 1;
|
||||
}
|
||||
else if (c < 0x800) {
|
||||
first = 0xc0;
|
||||
len = 2;
|
||||
}
|
||||
else if (c < 0x10000) {
|
||||
first = 0xe0;
|
||||
len = 3;
|
||||
}
|
||||
else if (c < 0x200000) {
|
||||
first = 0xf0;
|
||||
len = 4;
|
||||
}
|
||||
else if (c < 0x4000000) {
|
||||
first = 0xf8;
|
||||
len = 5;
|
||||
}
|
||||
else {
|
||||
first = 0xfc;
|
||||
len = 6;
|
||||
UTF8_VARS_FROM_CHAR32(c, first, len);
|
||||
(void)first;
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* BLI_str_utf8_from_unicode:
|
||||
*
|
||||
* \param c: a Unicode character code
|
||||
* \param outbuf: output buffer, must have at least `outbuf_len` bytes of space.
|
||||
* If the length required by `c` exceeds `outbuf_len`,
|
||||
* the bytes available bytes will be zeroed and `outbuf_len` returned.
|
||||
*
|
||||
* Converts a single character to UTF-8.
|
||||
*
|
||||
* \return number of bytes written.
|
||||
*/
|
||||
size_t BLI_str_utf8_from_unicode(uint c, char *outbuf, const size_t outbuf_len)
|
||||
|
||||
{
|
||||
/* If this gets modified, also update the copy in g_string_insert_unichar() */
|
||||
uint len = 0;
|
||||
uint first;
|
||||
|
||||
UTF8_VARS_FROM_CHAR32(c, first, len);
|
||||
|
||||
if (UNLIKELY(outbuf_len < len)) {
|
||||
/* NULL terminate instead of writing a partial byte. */
|
||||
memset(outbuf, 0x0, outbuf_len);
|
||||
return outbuf_len;
|
||||
}
|
||||
|
||||
if (outbuf) {
|
||||
for (i = len - 1; i > 0; i--) {
|
||||
outbuf[i] = (c & 0x3f) | 0x80;
|
||||
c >>= 6;
|
||||
}
|
||||
outbuf[0] = c | first;
|
||||
for (uint i = len - 1; i > 0; i--) {
|
||||
outbuf[i] = (c & 0x3f) | 0x80;
|
||||
c >>= 6;
|
||||
}
|
||||
outbuf[0] = c | first;
|
||||
|
||||
return len;
|
||||
}
|
||||
@@ -701,36 +706,19 @@ size_t BLI_str_utf32_as_utf8(char *__restrict dst,
|
||||
const char32_t *__restrict src,
|
||||
const size_t maxncpy)
|
||||
{
|
||||
const size_t maxlen = maxncpy - 1;
|
||||
/* #BLI_UTF8_MAX is max utf8 length of an unicode char. */
|
||||
const int64_t maxlen_secured = (int64_t)maxlen - BLI_UTF8_MAX;
|
||||
size_t len = 0;
|
||||
|
||||
BLI_assert(maxncpy != 0);
|
||||
|
||||
size_t len = 0;
|
||||
#ifdef DEBUG_STRSIZE
|
||||
memset(dst, 0xff, sizeof(*dst) * maxncpy);
|
||||
#endif
|
||||
|
||||
while (*src && len <= maxlen_secured) {
|
||||
len += BLI_str_utf8_from_unicode((uint)*src++, dst + len);
|
||||
while (*src && len < maxncpy) {
|
||||
len += BLI_str_utf8_from_unicode((uint)*src++, dst + len, maxncpy - len);
|
||||
}
|
||||
|
||||
/* We have to be more careful for the last six bytes,
|
||||
* to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */
|
||||
while (*src) {
|
||||
char t[BLI_UTF8_MAX];
|
||||
size_t l = BLI_str_utf8_from_unicode((uint)*src++, t);
|
||||
BLI_assert(l <= BLI_UTF8_MAX);
|
||||
if (len + l > maxlen) {
|
||||
break;
|
||||
}
|
||||
memcpy(dst + len, t, l);
|
||||
len += l;
|
||||
}
|
||||
|
||||
dst[len] = '\0';
|
||||
|
||||
/* Return the correct length when part of the final byte did not fit into the string. */
|
||||
while ((len > 0) && UNLIKELY(dst[len - 1] == '\0')) {
|
||||
len--;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
@@ -740,7 +728,7 @@ size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
|
||||
size_t len = 0;
|
||||
|
||||
while (*src) {
|
||||
len += BLI_str_utf8_from_unicode((uint)*src++, NULL);
|
||||
len += BLI_str_utf8_from_unicode_len((uint)*src++);
|
||||
}
|
||||
|
||||
return len;
|
||||
|
||||
@@ -435,7 +435,7 @@ static int console_insert_invoke(bContext *C, wmOperator *op, const wmEvent *eve
|
||||
}
|
||||
else {
|
||||
/* in theory, ghost can set value to extended ascii here */
|
||||
len = BLI_str_utf8_from_unicode(event->ascii, str);
|
||||
len = BLI_str_utf8_from_unicode(event->ascii, str, sizeof(str) - 1);
|
||||
}
|
||||
str[len] = '\0';
|
||||
RNA_string_set(op->ptr, "text", str);
|
||||
|
||||
@@ -3486,7 +3486,7 @@ static int text_insert_invoke(bContext *C, wmOperator *op, const wmEvent *event)
|
||||
}
|
||||
else {
|
||||
/* in theory, ghost can set value to extended ascii here */
|
||||
len = BLI_str_utf8_from_unicode(event->ascii, str);
|
||||
len = BLI_str_utf8_from_unicode(event->ascii, str, sizeof(str) - 1);
|
||||
}
|
||||
str[len] = '\0';
|
||||
RNA_string_set(op->ptr, "text", str);
|
||||
|
||||
Reference in New Issue
Block a user