BLI_string_utf8: remove unnecessary utf8 decoding functions

Remove BLI_str_utf8_as_unicode_and_size and
BLI_str_utf8_as_unicode_and_size_safe.

Use BLI_str_utf8_as_unicode_step instead since it takes
a buffer bounds argument to prevent buffer over-reading.
This commit is contained in:
2021-08-25 15:19:00 +10:00
parent be906f44c6
commit 38630711a0
7 changed files with 51 additions and 70 deletions

View File

@@ -71,12 +71,12 @@ int damerau_levenshtein_distance(StringRef a, StringRef b)
for (const int i : IndexRange(size_a)) {
v2[0] = (i + 1) * deletion_cost;
const uint32_t unicode_a = BLI_str_utf8_as_unicode_and_size(a.data() + offset_a, &offset_a);
const uint32_t unicode_a = BLI_str_utf8_as_unicode_step(a.data(), a.size(), &offset_a);
uint32_t prev_unicode_b;
size_t offset_b = 0;
for (const int j : IndexRange(size_b)) {
const uint32_t unicode_b = BLI_str_utf8_as_unicode_and_size(b.data() + offset_b, &offset_b);
const uint32_t unicode_b = BLI_str_utf8_as_unicode_step(b.data(), b.size(), &offset_b);
/* Check how costly the different operations would be and pick the cheapest - the one with
* minimal cost. */
@@ -202,8 +202,8 @@ static bool match_word_initials(StringRef query,
int first_found_word_index = -1;
while (query_index < query.size()) {
const uint query_unicode = BLI_str_utf8_as_unicode_and_size(query.data() + query_index,
&query_index);
const uint query_unicode = BLI_str_utf8_as_unicode_step(
query.data(), query.size(), &query_index);
while (true) {
/* We are at the end of words, no complete match has been found yet. */
if (word_index >= words.size()) {
@@ -226,8 +226,8 @@ static bool match_word_initials(StringRef query,
StringRef word = words[word_index];
/* Try to match the current character with the current word. */
if (static_cast<int>(char_index) < word.size()) {
const uint32_t char_unicode = BLI_str_utf8_as_unicode_and_size(word.data() + char_index,
&char_index);
const uint32_t char_unicode = BLI_str_utf8_as_unicode_step(
word.data(), word.size(), &char_index);
if (query_unicode == char_unicode) {
r_word_is_matched[word_index] = true;
if (first_found_word_index == -1) {
@@ -368,8 +368,9 @@ void extract_normalized_words(StringRef str,
size_t word_start = 0;
size_t offset = 0;
while (offset < str_size_in_bytes) {
size_t size = 0;
uint32_t unicode = BLI_str_utf8_as_unicode_and_size(str.data() + offset, &size);
size_t size = offset;
uint32_t unicode = BLI_str_utf8_as_unicode_step(str.data(), str.size(), &size);
size -= offset;
if (is_separator(unicode)) {
if (is_in_word) {
r_words.append(