BLI_string_utf8: remove unnecessary utf8 decoding functions

Remove BLI_str_utf8_as_unicode_and_size and BLI_str_utf8_as_unicode_and_size_safe. Use BLI_str_utf8_as_unicode_step instead since it takes a buffer bounds argument to prevent buffer over-reading.
2021-08-25 15:19:00 +10:00
parent be906f44c6
commit 38630711a0
7 changed files with 51 additions and 70 deletions
--- a/source/blender/blenlib/intern/string_search.cc
+++ b/source/blender/blenlib/intern/string_search.cc
@@ -71,12 +71,12 @@ int damerau_levenshtein_distance(StringRef a, StringRef b)
  for (const int i : IndexRange(size_a)) {
    v2[0] = (i + 1) * deletion_cost;

-    const uint32_t unicode_a = BLI_str_utf8_as_unicode_and_size(a.data() + offset_a, &offset_a);
+    const uint32_t unicode_a = BLI_str_utf8_as_unicode_step(a.data(), a.size(), &offset_a);

    uint32_t prev_unicode_b;
    size_t offset_b = 0;
    for (const int j : IndexRange(size_b)) {
-      const uint32_t unicode_b = BLI_str_utf8_as_unicode_and_size(b.data() + offset_b, &offset_b);
+      const uint32_t unicode_b = BLI_str_utf8_as_unicode_step(b.data(), b.size(), &offset_b);

      /* Check how costly the different operations would be and pick the cheapest - the one with
       * minimal cost. */
@@ -202,8 +202,8 @@ static bool match_word_initials(StringRef query,
  int first_found_word_index = -1;

  while (query_index < query.size()) {
-    const uint query_unicode = BLI_str_utf8_as_unicode_and_size(query.data() + query_index,
-                                                                &query_index);
+    const uint query_unicode = BLI_str_utf8_as_unicode_step(
+        query.data(), query.size(), &query_index);
    while (true) {
      /* We are at the end of words, no complete match has been found yet. */
      if (word_index >= words.size()) {
@@ -226,8 +226,8 @@ static bool match_word_initials(StringRef query,
      StringRef word = words[word_index];
      /* Try to match the current character with the current word. */
      if (static_cast<int>(char_index) < word.size()) {
-        const uint32_t char_unicode = BLI_str_utf8_as_unicode_and_size(word.data() + char_index,
-                                                                       &char_index);
+        const uint32_t char_unicode = BLI_str_utf8_as_unicode_step(
+            word.data(), word.size(), &char_index);
        if (query_unicode == char_unicode) {
          r_word_is_matched[word_index] = true;
          if (first_found_word_index == -1) {
@@ -368,8 +368,9 @@ void extract_normalized_words(StringRef str,
  size_t word_start = 0;
  size_t offset = 0;
  while (offset < str_size_in_bytes) {
-    size_t size = 0;
-    uint32_t unicode = BLI_str_utf8_as_unicode_and_size(str.data() + offset, &size);
+    size_t size = offset;
+    uint32_t unicode = BLI_str_utf8_as_unicode_step(str.data(), str.size(), &size);
+    size -= offset;
    if (is_separator(unicode)) {
      if (is_in_word) {
        r_words.append(