2011-09-15 08:07:42 +00:00
|
|
|
/*
|
|
|
|
* ***** BEGIN GPL LICENSE BLOCK *****
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*
|
|
|
|
* The Original Code is Copyright (C) 2011 Blender Foundation.
|
2011-10-21 01:46:03 +00:00
|
|
|
* Code from gutf8.c Copyright (C) 1999 Tom Tromey
|
|
|
|
* Copyright (C) 2000 Red Hat, Inc.
|
2011-09-15 08:07:42 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Contributor(s): Campbell Barton.
|
|
|
|
*
|
|
|
|
* ***** END GPL LICENSE BLOCK *****
|
2011-09-15 16:37:36 +00:00
|
|
|
*
|
2011-09-15 08:07:42 +00:00
|
|
|
*/
|
2011-09-15 16:37:36 +00:00
|
|
|
|
2011-09-15 08:07:42 +00:00
|
|
|
/** \file blender/blenlib/intern/string_utf8.c
|
2012-03-03 20:19:11 +00:00
|
|
|
* \ingroup bli
|
|
|
|
*/
|
2011-09-15 16:37:36 +00:00
|
|
|
|
2011-09-15 08:07:42 +00:00
|
|
|
#include <string.h>
|
2011-10-20 09:47:05 +00:00
|
|
|
#include <wchar.h>
|
|
|
|
#include <wctype.h>
|
2012-10-31 04:28:49 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2011-09-15 08:07:42 +00:00
|
|
|
|
2012-10-31 04:28:49 +00:00
|
|
|
#include "BLI_utildefines.h"
|
|
|
|
|
|
|
|
#include "BLI_string_utf8.h" /* own include */
|
2011-09-16 08:20:21 +00:00
|
|
|
|
2011-09-15 08:07:42 +00:00
|
|
|
/* from libswish3, originally called u8_isvalid(),
|
|
|
|
* modified to return the index of the bad character (byte index not utf).
|
|
|
|
* http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */
|
|
|
|
|
|
|
|
/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
|
2012-03-03 20:19:11 +00:00
|
|
|
*
|
|
|
|
* length is in bytes, since without knowing whether the string is valid
|
|
|
|
* it's hard to know how many characters there are! */
|
2011-09-15 08:07:42 +00:00
|
|
|
|
|
|
|
static const char trailingBytesForUTF8[256] = {
|
2012-04-29 15:47:02 +00:00
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
|
2011-09-15 08:07:42 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
int BLI_utf8_invalid_byte(const char *str, int length)
|
|
|
|
{
|
2012-09-30 06:12:47 +00:00
|
|
|
const unsigned char *p, *pend = (unsigned char *)str + length;
|
2011-09-15 08:07:42 +00:00
|
|
|
unsigned char c;
|
|
|
|
int ab;
|
|
|
|
|
2012-09-30 06:12:47 +00:00
|
|
|
for (p = (unsigned char *)str; p < pend; p++) {
|
2011-09-15 08:07:42 +00:00
|
|
|
c = *p;
|
|
|
|
if (c < 128)
|
|
|
|
continue;
|
|
|
|
if ((c & 0xc0) != 0xc0)
|
|
|
|
goto utf8_error;
|
|
|
|
ab = trailingBytesForUTF8[c];
|
|
|
|
if (length < ab)
|
|
|
|
goto utf8_error;
|
|
|
|
length -= ab;
|
|
|
|
|
|
|
|
p++;
|
|
|
|
/* Check top bits in the second byte */
|
|
|
|
if ((*p & 0xc0) != 0x80)
|
|
|
|
goto utf8_error;
|
|
|
|
|
|
|
|
/* Check for overlong sequences for each different length */
|
|
|
|
switch (ab) {
|
|
|
|
/* Check for xx00 000x */
|
|
|
|
case 1:
|
|
|
|
if ((c & 0x3e) == 0) goto utf8_error;
|
|
|
|
continue; /* We know there aren't any more bytes to check */
|
|
|
|
|
|
|
|
/* Check for 1110 0000, xx0x xxxx */
|
|
|
|
case 2:
|
|
|
|
if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Check for 1111 0000, xx00 xxxx */
|
|
|
|
case 3:
|
|
|
|
if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Check for 1111 1000, xx00 0xxx */
|
|
|
|
case 4:
|
|
|
|
if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Check for leading 0xfe or 0xff,
|
2012-03-03 20:19:11 +00:00
|
|
|
* and then for 1111 1100, xx00 00xx */
|
2011-09-15 08:07:42 +00:00
|
|
|
case 5:
|
|
|
|
if (c == 0xfe || c == 0xff ||
|
|
|
|
(c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
|
|
|
while (--ab > 0) {
|
|
|
|
if ((*(p+1) & 0xc0) != 0x80) goto utf8_error;
|
|
|
|
p++; /* do this after so we get usable offset - campbell */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
utf8_error:
|
|
|
|
|
|
|
|
return (int)((char *)p - (char *)str) - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int BLI_utf8_invalid_strip(char *str, int length)
|
|
|
|
{
|
2012-10-23 03:38:26 +00:00
|
|
|
int bad_char, tot = 0;
|
2011-09-15 08:07:42 +00:00
|
|
|
|
2012-10-23 03:38:26 +00:00
|
|
|
while ((bad_char = BLI_utf8_invalid_byte(str, length)) != -1) {
|
2011-09-15 08:07:42 +00:00
|
|
|
str += bad_char;
|
|
|
|
length -= bad_char;
|
|
|
|
|
2012-03-24 06:18:31 +00:00
|
|
|
if (length == 0) {
|
2011-09-15 08:07:42 +00:00
|
|
|
/* last character bad, strip it */
|
2012-10-23 03:38:26 +00:00
|
|
|
*str = '\0';
|
2011-09-15 08:07:42 +00:00
|
|
|
tot++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* strip, keep looking */
|
|
|
|
memmove(str, str + 1, length);
|
|
|
|
tot++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return tot;
|
|
|
|
}
|
2011-09-15 11:49:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
/* compatible with BLI_strncpy, but esnure no partial utf8 chars */
|
|
|
|
|
2011-09-15 16:15:24 +00:00
|
|
|
/* array copied from glib's gutf8.c,
|
2011-09-15 11:49:36 +00:00
|
|
|
* note: this looks to be at odd's with 'trailingBytesForUTF8',
|
|
|
|
* need to find out what gives here! - campbell */
|
|
|
|
static const size_t utf8_skip_data[256] = {
|
2012-11-03 15:35:03 +00:00
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
2011-09-15 11:49:36 +00:00
|
|
|
};
|
|
|
|
|
2011-11-21 11:53:29 +00:00
|
|
|
#define BLI_STR_UTF8_CPY(dst, src, maxncpy) \
|
|
|
|
{ \
|
|
|
|
size_t utf8_size; \
|
2012-10-23 03:38:26 +00:00
|
|
|
while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) {\
|
2011-11-21 11:53:29 +00:00
|
|
|
maxncpy -= utf8_size; \
|
2012-10-23 03:38:26 +00:00
|
|
|
switch (utf8_size) { \
|
2011-11-21 11:53:29 +00:00
|
|
|
case 6: *dst ++ = *src ++; \
|
|
|
|
case 5: *dst ++ = *src ++; \
|
|
|
|
case 4: *dst ++ = *src ++; \
|
|
|
|
case 3: *dst ++ = *src ++; \
|
|
|
|
case 2: *dst ++ = *src ++; \
|
|
|
|
case 1: *dst ++ = *src ++; \
|
|
|
|
} \
|
|
|
|
} \
|
2012-10-23 03:38:26 +00:00
|
|
|
*dst = '\0'; \
|
2012-05-27 20:13:59 +00:00
|
|
|
} (void)0
|
2011-11-21 11:53:29 +00:00
|
|
|
|
2011-09-15 11:49:36 +00:00
|
|
|
char *BLI_strncpy_utf8(char *dst, const char *src, size_t maxncpy)
|
|
|
|
{
|
2012-10-23 03:38:26 +00:00
|
|
|
char *dst_r = dst;
|
2011-09-15 11:49:36 +00:00
|
|
|
|
2012-10-31 04:24:55 +00:00
|
|
|
BLI_assert(maxncpy != 0);
|
|
|
|
|
2012-03-18 07:38:51 +00:00
|
|
|
/* note: currently we don't attempt to deal with invalid utf8 chars */
|
2012-05-27 20:13:59 +00:00
|
|
|
BLI_STR_UTF8_CPY(dst, src, maxncpy);
|
2011-09-15 11:49:36 +00:00
|
|
|
|
|
|
|
return dst_r;
|
|
|
|
}
|
|
|
|
|
2011-11-21 11:53:29 +00:00
|
|
|
char *BLI_strncat_utf8(char *dst, const char *src, size_t maxncpy)
|
|
|
|
{
|
|
|
|
while (*dst && maxncpy > 0) {
|
|
|
|
dst++;
|
|
|
|
maxncpy--;
|
|
|
|
}
|
|
|
|
|
2012-05-27 20:13:59 +00:00
|
|
|
BLI_STR_UTF8_CPY(dst, src, maxncpy);
|
2011-11-21 11:53:29 +00:00
|
|
|
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef BLI_STR_UTF8_CPY
|
2011-10-20 09:47:05 +00:00
|
|
|
|
|
|
|
/* --------------------------------------------------------------------------*/
|
|
|
|
/* wchar_t / utf8 functions */
|
|
|
|
|
2012-10-31 04:24:55 +00:00
|
|
|
size_t BLI_strncpy_wchar_as_utf8(char *dst, const wchar_t *src, const size_t maxncpy)
|
2011-10-20 09:47:05 +00:00
|
|
|
{
|
|
|
|
size_t len = 0;
|
2012-10-31 04:24:55 +00:00
|
|
|
|
|
|
|
BLI_assert(maxncpy != 0);
|
|
|
|
|
|
|
|
while (*src && len < maxncpy) { /* XXX can still run over the buffer because utf8 size isn't known :| */
|
2012-06-17 09:58:26 +00:00
|
|
|
len += BLI_str_utf8_from_unicode(*src++, dst + len);
|
2011-10-20 09:47:05 +00:00
|
|
|
}
|
|
|
|
|
2012-10-22 08:15:51 +00:00
|
|
|
dst[len] = '\0';
|
2011-10-20 09:47:05 +00:00
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* wchar len in utf8 */
|
|
|
|
size_t BLI_wstrlen_utf8(const wchar_t *src)
|
|
|
|
{
|
|
|
|
size_t len = 0;
|
|
|
|
|
2012-03-24 06:18:31 +00:00
|
|
|
while (*src) {
|
2011-10-21 00:01:22 +00:00
|
|
|
len += BLI_str_utf8_from_unicode(*src++, NULL);
|
2011-10-20 09:47:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2012-10-27 01:46:47 +00:00
|
|
|
/* this is very close to 'BLI_str_utf8_size' functionality, perhaps we should de-duplicate */
|
* New string property subtype: PASSWORD
When this new subtypes is used, then string of property is hidden using
asterisks, e.g.: mysecretpassword -> ****************
This code was reviewed and modified by Brecht. Thanks very much:
- https://codereview.appspot.com/6713044/
This new subtype of string property is intended mostly for Add-on developers
writing Add-on which communicates with some server (http, sql, ftp, verse,
etc.). When this server requires user authentication and user has to type
username and password, then current API didn't allow to type 'hidden' password,
e.g. when you want to demonstrate this script, then everybody can see this
security password. Some examples of Add-on which could use this new subtype:
- On-line database of textures
- Integration of render farm
- Integration of Verse
Security Notes:
- You can copy paste hiddent string of property from text input using (Ctrl-C, Ctrl-V),
but you can do this in other GUI toolkits too (this behavior it is widely used).
- Text of string property is stored in plain text, but it is widely used in other
GUI toolkits (Qt, Gtk, etc.).
Simple examples:
- https://dl.dropbox.com/u/369894/draw_op_passwd.py
- https://dl.dropbox.com/u/369894/blender-password.png
2012-10-26 12:58:54 +00:00
|
|
|
/* size of UTF-8 character in bytes */
|
2012-10-27 01:46:47 +00:00
|
|
|
static size_t strlen_utf8_char(const char *strc)
|
* New string property subtype: PASSWORD
When this new subtypes is used, then string of property is hidden using
asterisks, e.g.: mysecretpassword -> ****************
This code was reviewed and modified by Brecht. Thanks very much:
- https://codereview.appspot.com/6713044/
This new subtype of string property is intended mostly for Add-on developers
writing Add-on which communicates with some server (http, sql, ftp, verse,
etc.). When this server requires user authentication and user has to type
username and password, then current API didn't allow to type 'hidden' password,
e.g. when you want to demonstrate this script, then everybody can see this
security password. Some examples of Add-on which could use this new subtype:
- On-line database of textures
- Integration of render farm
- Integration of Verse
Security Notes:
- You can copy paste hiddent string of property from text input using (Ctrl-C, Ctrl-V),
but you can do this in other GUI toolkits too (this behavior it is widely used).
- Text of string property is stored in plain text, but it is widely used in other
GUI toolkits (Qt, Gtk, etc.).
Simple examples:
- https://dl.dropbox.com/u/369894/draw_op_passwd.py
- https://dl.dropbox.com/u/369894/blender-password.png
2012-10-26 12:58:54 +00:00
|
|
|
{
|
|
|
|
if ((*strc & 0xe0) == 0xc0) {
|
|
|
|
if ((strc[1] & 0x80) && (strc[1] & 0x40) == 0x00)
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
else if ((*strc & 0xf0) == 0xe0) {
|
|
|
|
if ((strc[1] & strc[2] & 0x80) && ((strc[1] | strc[2]) & 0x40) == 0x00)
|
|
|
|
return 3;
|
|
|
|
}
|
|
|
|
else if ((*strc & 0xf8) == 0xf0) {
|
|
|
|
if ((strc[1] & strc[2] & strc[3] & 0x80) && ((strc[1] | strc[2] | strc[3]) & 0x40) == 0x00)
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2011-10-20 09:47:05 +00:00
|
|
|
size_t BLI_strlen_utf8(const char *strc)
|
|
|
|
{
|
* New string property subtype: PASSWORD
When this new subtypes is used, then string of property is hidden using
asterisks, e.g.: mysecretpassword -> ****************
This code was reviewed and modified by Brecht. Thanks very much:
- https://codereview.appspot.com/6713044/
This new subtype of string property is intended mostly for Add-on developers
writing Add-on which communicates with some server (http, sql, ftp, verse,
etc.). When this server requires user authentication and user has to type
username and password, then current API didn't allow to type 'hidden' password,
e.g. when you want to demonstrate this script, then everybody can see this
security password. Some examples of Add-on which could use this new subtype:
- On-line database of textures
- Integration of render farm
- Integration of Verse
Security Notes:
- You can copy paste hiddent string of property from text input using (Ctrl-C, Ctrl-V),
but you can do this in other GUI toolkits too (this behavior it is widely used).
- Text of string property is stored in plain text, but it is widely used in other
GUI toolkits (Qt, Gtk, etc.).
Simple examples:
- https://dl.dropbox.com/u/369894/draw_op_passwd.py
- https://dl.dropbox.com/u/369894/blender-password.png
2012-10-26 12:58:54 +00:00
|
|
|
int len;
|
2011-10-20 09:47:05 +00:00
|
|
|
|
* New string property subtype: PASSWORD
When this new subtypes is used, then string of property is hidden using
asterisks, e.g.: mysecretpassword -> ****************
This code was reviewed and modified by Brecht. Thanks very much:
- https://codereview.appspot.com/6713044/
This new subtype of string property is intended mostly for Add-on developers
writing Add-on which communicates with some server (http, sql, ftp, verse,
etc.). When this server requires user authentication and user has to type
username and password, then current API didn't allow to type 'hidden' password,
e.g. when you want to demonstrate this script, then everybody can see this
security password. Some examples of Add-on which could use this new subtype:
- On-line database of textures
- Integration of render farm
- Integration of Verse
Security Notes:
- You can copy paste hiddent string of property from text input using (Ctrl-C, Ctrl-V),
but you can do this in other GUI toolkits too (this behavior it is widely used).
- Text of string property is stored in plain text, but it is widely used in other
GUI toolkits (Qt, Gtk, etc.).
Simple examples:
- https://dl.dropbox.com/u/369894/draw_op_passwd.py
- https://dl.dropbox.com/u/369894/blender-password.png
2012-10-26 12:58:54 +00:00
|
|
|
for (len = 0; *strc; len++)
|
2012-10-27 01:46:47 +00:00
|
|
|
strc += strlen_utf8_char(strc);
|
2011-10-20 09:47:05 +00:00
|
|
|
|
* New string property subtype: PASSWORD
When this new subtypes is used, then string of property is hidden using
asterisks, e.g.: mysecretpassword -> ****************
This code was reviewed and modified by Brecht. Thanks very much:
- https://codereview.appspot.com/6713044/
This new subtype of string property is intended mostly for Add-on developers
writing Add-on which communicates with some server (http, sql, ftp, verse,
etc.). When this server requires user authentication and user has to type
username and password, then current API didn't allow to type 'hidden' password,
e.g. when you want to demonstrate this script, then everybody can see this
security password. Some examples of Add-on which could use this new subtype:
- On-line database of textures
- Integration of render farm
- Integration of Verse
Security Notes:
- You can copy paste hiddent string of property from text input using (Ctrl-C, Ctrl-V),
but you can do this in other GUI toolkits too (this behavior it is widely used).
- Text of string property is stored in plain text, but it is widely used in other
GUI toolkits (Qt, Gtk, etc.).
Simple examples:
- https://dl.dropbox.com/u/369894/draw_op_passwd.py
- https://dl.dropbox.com/u/369894/blender-password.png
2012-10-26 12:58:54 +00:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2012-10-27 02:47:39 +00:00
|
|
|
/**
|
|
|
|
* \param start the string to measure the length.
|
|
|
|
* \param maxlen the string length (in bytes)
|
|
|
|
* \return the unicode length (not in bytes!)
|
|
|
|
*/
|
|
|
|
size_t BLI_strnlen_utf8(const char *start, const size_t maxlen)
|
* New string property subtype: PASSWORD
When this new subtypes is used, then string of property is hidden using
asterisks, e.g.: mysecretpassword -> ****************
This code was reviewed and modified by Brecht. Thanks very much:
- https://codereview.appspot.com/6713044/
This new subtype of string property is intended mostly for Add-on developers
writing Add-on which communicates with some server (http, sql, ftp, verse,
etc.). When this server requires user authentication and user has to type
username and password, then current API didn't allow to type 'hidden' password,
e.g. when you want to demonstrate this script, then everybody can see this
security password. Some examples of Add-on which could use this new subtype:
- On-line database of textures
- Integration of render farm
- Integration of Verse
Security Notes:
- You can copy paste hiddent string of property from text input using (Ctrl-C, Ctrl-V),
but you can do this in other GUI toolkits too (this behavior it is widely used).
- Text of string property is stored in plain text, but it is widely used in other
GUI toolkits (Qt, Gtk, etc.).
Simple examples:
- https://dl.dropbox.com/u/369894/draw_op_passwd.py
- https://dl.dropbox.com/u/369894/blender-password.png
2012-10-26 12:58:54 +00:00
|
|
|
{
|
|
|
|
const char *strc = start;
|
2012-10-27 02:47:39 +00:00
|
|
|
const char *strc_end = start + maxlen;
|
* New string property subtype: PASSWORD
When this new subtypes is used, then string of property is hidden using
asterisks, e.g.: mysecretpassword -> ****************
This code was reviewed and modified by Brecht. Thanks very much:
- https://codereview.appspot.com/6713044/
This new subtype of string property is intended mostly for Add-on developers
writing Add-on which communicates with some server (http, sql, ftp, verse,
etc.). When this server requires user authentication and user has to type
username and password, then current API didn't allow to type 'hidden' password,
e.g. when you want to demonstrate this script, then everybody can see this
security password. Some examples of Add-on which could use this new subtype:
- On-line database of textures
- Integration of render farm
- Integration of Verse
Security Notes:
- You can copy paste hiddent string of property from text input using (Ctrl-C, Ctrl-V),
but you can do this in other GUI toolkits too (this behavior it is widely used).
- Text of string property is stored in plain text, but it is widely used in other
GUI toolkits (Qt, Gtk, etc.).
Simple examples:
- https://dl.dropbox.com/u/369894/draw_op_passwd.py
- https://dl.dropbox.com/u/369894/blender-password.png
2012-10-26 12:58:54 +00:00
|
|
|
|
2012-10-27 02:47:39 +00:00
|
|
|
size_t len;
|
|
|
|
|
|
|
|
for (len = 0; *strc && strc < strc_end; len++) {
|
2012-10-27 01:46:47 +00:00
|
|
|
strc += strlen_utf8_char(strc);
|
2012-10-27 02:47:39 +00:00
|
|
|
}
|
2011-10-20 09:47:05 +00:00
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2012-10-31 04:24:55 +00:00
|
|
|
size_t BLI_strncpy_wchar_from_utf8(wchar_t *dst_w, const char *src_c, const size_t maxncpy)
|
2011-10-20 09:47:05 +00:00
|
|
|
{
|
2012-08-11 22:12:32 +00:00
|
|
|
int len = 0;
|
2011-10-20 09:47:05 +00:00
|
|
|
|
2012-08-04 12:30:16 +00:00
|
|
|
if (dst_w == NULL || src_c == NULL) {
|
|
|
|
return 0;
|
|
|
|
}
|
2011-10-20 09:47:05 +00:00
|
|
|
|
2012-10-31 04:24:55 +00:00
|
|
|
while (*src_c && len < maxncpy) {
|
2012-10-23 03:38:26 +00:00
|
|
|
size_t step = 0;
|
|
|
|
unsigned int unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step);
|
2011-10-21 00:48:02 +00:00
|
|
|
if (unicode != BLI_UTF8_ERR) {
|
2012-10-23 03:38:26 +00:00
|
|
|
*dst_w = (wchar_t)unicode;
|
2011-10-21 00:01:22 +00:00
|
|
|
src_c += step;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
*dst_w = '?';
|
2012-10-23 03:38:26 +00:00
|
|
|
src_c = BLI_str_find_next_char_utf8(src_c, NULL);
|
2011-10-20 09:47:05 +00:00
|
|
|
}
|
|
|
|
dst_w++;
|
|
|
|
len++;
|
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* end wchar_t / utf8 functions */
|
|
|
|
/* --------------------------------------------------------------------------*/
|
|
|
|
|
2012-11-09 03:36:38 +00:00
|
|
|
/* copied from glib's gutf8.c, added 'Err' arg */
|
2011-10-21 00:01:22 +00:00
|
|
|
|
|
|
|
/* note, glib uses unsigned int for unicode, best we do the same,
|
2012-03-18 07:38:51 +00:00
|
|
|
* though we don't typedef it - campbell */
|
2011-10-21 00:01:22 +00:00
|
|
|
|
2012-11-09 03:36:38 +00:00
|
|
|
#define UTF8_COMPUTE(Char, Mask, Len, Err) \
|
2011-10-21 00:01:22 +00:00
|
|
|
if (Char < 128) { \
|
|
|
|
Len = 1; \
|
|
|
|
Mask = 0x7f; \
|
|
|
|
} \
|
|
|
|
else if ((Char & 0xe0) == 0xc0) { \
|
|
|
|
Len = 2; \
|
|
|
|
Mask = 0x1f; \
|
|
|
|
} \
|
|
|
|
else if ((Char & 0xf0) == 0xe0) { \
|
|
|
|
Len = 3; \
|
|
|
|
Mask = 0x0f; \
|
|
|
|
} \
|
|
|
|
else if ((Char & 0xf8) == 0xf0) { \
|
|
|
|
Len = 4; \
|
|
|
|
Mask = 0x07; \
|
|
|
|
} \
|
|
|
|
else if ((Char & 0xfc) == 0xf8) { \
|
|
|
|
Len = 5; \
|
|
|
|
Mask = 0x03; \
|
|
|
|
} \
|
|
|
|
else if ((Char & 0xfe) == 0xfc) { \
|
|
|
|
Len = 6; \
|
|
|
|
Mask = 0x01; \
|
|
|
|
} \
|
|
|
|
else { \
|
2012-11-09 03:36:38 +00:00
|
|
|
Len = Err; /* -1 is the typical error value or 1 to skip */ \
|
2012-05-27 20:13:59 +00:00
|
|
|
} (void)0
|
2011-10-20 09:47:05 +00:00
|
|
|
|
2011-10-21 01:33:06 +00:00
|
|
|
/* same as glib define but added an 'Err' arg */
|
|
|
|
#define UTF8_GET(Result, Chars, Count, Mask, Len, Err) \
|
2011-10-21 00:01:22 +00:00
|
|
|
(Result) = (Chars)[0] & (Mask); \
|
|
|
|
for ((Count) = 1; (Count) < (Len); ++(Count)) { \
|
|
|
|
if (((Chars)[(Count)] & 0xc0) != 0x80) { \
|
2011-10-21 01:33:06 +00:00
|
|
|
(Result) = Err; \
|
2011-10-21 00:01:22 +00:00
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
(Result) <<= 6; \
|
|
|
|
(Result) |= ((Chars)[(Count)] & 0x3f); \
|
2012-05-27 20:13:59 +00:00
|
|
|
} (void)0
|
2011-10-20 09:47:05 +00:00
|
|
|
|
|
|
|
|
2011-10-23 13:52:51 +00:00
|
|
|
/* uses glib functions but not from glib */
|
|
|
|
/* gets the size of a single utf8 char */
|
|
|
|
int BLI_str_utf8_size(const char *p)
|
|
|
|
{
|
|
|
|
int mask = 0, len;
|
2012-06-04 20:11:09 +00:00
|
|
|
unsigned char c = (unsigned char) *p;
|
2011-10-23 13:52:51 +00:00
|
|
|
|
2012-11-09 03:36:38 +00:00
|
|
|
UTF8_COMPUTE (c, mask, len, -1);
|
|
|
|
|
|
|
|
(void)mask; /* quiet warning */
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* use when we want to skip errors */
|
|
|
|
int BLI_str_utf8_size_safe(const char *p)
|
|
|
|
{
|
|
|
|
int mask = 0, len;
|
|
|
|
unsigned char c = (unsigned char) *p;
|
|
|
|
|
|
|
|
UTF8_COMPUTE (c, mask, len, 1);
|
2011-10-23 13:52:51 +00:00
|
|
|
|
2011-10-23 17:52:20 +00:00
|
|
|
(void)mask; /* quiet warning */
|
|
|
|
|
2011-10-23 13:52:51 +00:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2011-10-21 00:01:22 +00:00
|
|
|
/* was g_utf8_get_char */
|
|
|
|
/**
|
|
|
|
* BLI_str_utf8_as_unicode:
|
2012-02-27 20:27:19 +00:00
|
|
|
* @p a pointer to Unicode character encoded as UTF-8
|
2011-10-21 00:01:22 +00:00
|
|
|
*
|
|
|
|
* Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
|
|
|
|
* If @p does not point to a valid UTF-8 encoded character, results are
|
|
|
|
* undefined. If you are not sure that the bytes are complete
|
|
|
|
* valid Unicode characters, you should use g_utf8_get_char_validated()
|
|
|
|
* instead.
|
|
|
|
*
|
|
|
|
* Return value: the resulting character
|
|
|
|
**/
|
|
|
|
unsigned int BLI_str_utf8_as_unicode(const char *p)
|
|
|
|
{
|
2012-03-24 07:36:32 +00:00
|
|
|
int i, mask = 0, len;
|
|
|
|
unsigned int result;
|
|
|
|
unsigned char c = (unsigned char) *p;
|
2011-10-21 00:01:22 +00:00
|
|
|
|
2012-11-09 03:36:38 +00:00
|
|
|
UTF8_COMPUTE (c, mask, len, -1);
|
2012-03-24 07:36:32 +00:00
|
|
|
if (len == -1)
|
|
|
|
return BLI_UTF8_ERR;
|
|
|
|
UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR);
|
2011-10-21 00:01:22 +00:00
|
|
|
|
2012-03-24 07:36:32 +00:00
|
|
|
return result;
|
2011-10-21 00:01:22 +00:00
|
|
|
}
|
|
|
|
|
2012-03-02 16:05:54 +00:00
|
|
|
/* variant that increments the length */
|
2011-10-21 00:01:22 +00:00
|
|
|
unsigned int BLI_str_utf8_as_unicode_and_size(const char *p, size_t *index)
|
|
|
|
{
|
|
|
|
int i, mask = 0, len;
|
|
|
|
unsigned int result;
|
|
|
|
unsigned char c = (unsigned char) *p;
|
|
|
|
|
2012-11-09 03:36:38 +00:00
|
|
|
UTF8_COMPUTE (c, mask, len, -1);
|
2011-10-21 00:01:22 +00:00
|
|
|
if (len == -1)
|
2011-10-21 00:48:02 +00:00
|
|
|
return BLI_UTF8_ERR;
|
2011-10-21 01:33:06 +00:00
|
|
|
UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR);
|
2011-10-21 00:01:22 +00:00
|
|
|
*index += len;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2012-03-02 16:05:54 +00:00
|
|
|
/* another variant that steps over the index,
|
2011-10-21 01:33:06 +00:00
|
|
|
* note, currently this also falls back to latin1 for text drawing. */
|
2011-10-21 00:01:22 +00:00
|
|
|
unsigned int BLI_str_utf8_as_unicode_step(const char *p, size_t *index)
|
|
|
|
{
|
|
|
|
int i, mask = 0, len;
|
|
|
|
unsigned int result;
|
|
|
|
unsigned char c;
|
|
|
|
|
|
|
|
p += *index;
|
2012-10-23 03:38:26 +00:00
|
|
|
c = (unsigned char) *p;
|
2011-10-21 00:01:22 +00:00
|
|
|
|
2012-11-09 03:36:38 +00:00
|
|
|
UTF8_COMPUTE (c, mask, len, -1);
|
2011-10-21 00:01:22 +00:00
|
|
|
if (len == -1) {
|
|
|
|
/* when called with NULL end, result will never be NULL,
|
|
|
|
* checks for a NULL character */
|
2012-10-23 03:38:26 +00:00
|
|
|
char *p_next = BLI_str_find_next_char_utf8(p, NULL);
|
2011-10-21 00:01:22 +00:00
|
|
|
/* will never return the same pointer unless '\0',
|
|
|
|
* eternal loop is prevented */
|
|
|
|
*index += (size_t)(p_next - p);
|
2011-10-21 00:48:02 +00:00
|
|
|
return BLI_UTF8_ERR;
|
2011-10-21 00:01:22 +00:00
|
|
|
}
|
2011-10-21 01:33:06 +00:00
|
|
|
|
|
|
|
/* this is tricky since there are a few ways we can bail out of bad unicode
|
|
|
|
* values, 3 possible solutions. */
|
|
|
|
#if 0
|
|
|
|
UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR);
|
|
|
|
#elif 1
|
|
|
|
/* WARNING: this is NOT part of glib, or supported by similar functions.
|
|
|
|
* this is added for text drawing because some filepaths can have latin1
|
|
|
|
* characters */
|
|
|
|
UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR);
|
2012-03-24 06:18:31 +00:00
|
|
|
if (result == BLI_UTF8_ERR) {
|
2012-10-23 03:38:26 +00:00
|
|
|
len = 1;
|
|
|
|
result = *p;
|
2011-10-21 01:33:06 +00:00
|
|
|
}
|
|
|
|
/* end warning! */
|
|
|
|
#else
|
|
|
|
/* without a fallback like '?', text drawing will stop on this value */
|
|
|
|
UTF8_GET (result, p, i, mask, len, '?');
|
|
|
|
#endif
|
|
|
|
|
2011-10-21 00:01:22 +00:00
|
|
|
*index += len;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* was g_unichar_to_utf8 */
|
|
|
|
/**
|
|
|
|
* BLI_str_utf8_from_unicode:
|
2012-02-27 20:27:19 +00:00
|
|
|
* @c a Unicode character code
|
2012-03-02 16:05:54 +00:00
|
|
|
* \param outbuf output buffer, must have at least 6 bytes of space.
|
2011-10-21 00:01:22 +00:00
|
|
|
* If %NULL, the length will be computed and returned
|
2012-02-27 20:27:19 +00:00
|
|
|
* and nothing will be written to outbuf.
|
2011-10-21 00:01:22 +00:00
|
|
|
*
|
|
|
|
* Converts a single character to UTF-8.
|
|
|
|
*
|
|
|
|
* Return value: number of bytes written
|
|
|
|
**/
|
|
|
|
size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf)
|
|
|
|
{
|
|
|
|
/* If this gets modified, also update the copy in g_string_insert_unichar() */
|
|
|
|
unsigned int len = 0;
|
|
|
|
int first;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (c < 0x80) {
|
|
|
|
first = 0;
|
|
|
|
len = 1;
|
|
|
|
}
|
|
|
|
else if (c < 0x800) {
|
|
|
|
first = 0xc0;
|
|
|
|
len = 2;
|
|
|
|
}
|
|
|
|
else if (c < 0x10000) {
|
|
|
|
first = 0xe0;
|
|
|
|
len = 3;
|
|
|
|
}
|
|
|
|
else if (c < 0x200000) {
|
|
|
|
first = 0xf0;
|
|
|
|
len = 4;
|
|
|
|
}
|
|
|
|
else if (c < 0x4000000) {
|
|
|
|
first = 0xf8;
|
|
|
|
len = 5;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
first = 0xfc;
|
|
|
|
len = 6;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (outbuf) {
|
|
|
|
for (i = len - 1; i > 0; --i) {
|
|
|
|
outbuf[i] = (c & 0x3f) | 0x80;
|
|
|
|
c >>= 6;
|
|
|
|
}
|
|
|
|
outbuf[0] = c | first;
|
|
|
|
}
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* was g_utf8_find_prev_char */
|
2011-10-16 12:25:42 +00:00
|
|
|
/**
|
2011-10-21 00:01:22 +00:00
|
|
|
* BLI_str_find_prev_char_utf8:
|
2011-10-16 12:25:42 +00:00
|
|
|
* @str: pointer to the beginning of a UTF-8 encoded string
|
2012-02-27 20:27:19 +00:00
|
|
|
* @p pointer to some position within @str
|
2011-10-16 12:25:42 +00:00
|
|
|
*
|
|
|
|
* Given a position @p with a UTF-8 encoded string @str, find the start
|
2012-02-27 20:27:19 +00:00
|
|
|
* of the previous UTF-8 character starting before. @p Returns %NULL if no
|
|
|
|
* UTF-8 characters are present in @str before @p
|
2011-10-16 12:25:42 +00:00
|
|
|
*
|
|
|
|
* @p does not have to be at the beginning of a UTF-8 character. No check
|
|
|
|
* is made to see if the character found is actually valid other than
|
|
|
|
* it starts with an appropriate byte.
|
|
|
|
*
|
|
|
|
* Return value: a pointer to the found character or %NULL.
|
|
|
|
**/
|
|
|
|
char * BLI_str_find_prev_char_utf8(const char *str, const char *p)
|
|
|
|
{
|
|
|
|
for (--p; p >= str; --p) {
|
|
|
|
if ((*p & 0xc0) != 0x80) {
|
|
|
|
return (char *)p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2011-10-21 00:01:22 +00:00
|
|
|
/* was g_utf8_find_next_char */
|
2011-10-16 12:25:42 +00:00
|
|
|
/**
|
2011-10-21 00:01:22 +00:00
|
|
|
* BLI_str_find_next_char_utf8:
|
2012-02-27 20:27:19 +00:00
|
|
|
* @p a pointer to a position within a UTF-8 encoded string
|
|
|
|
* @end a pointer to the byte following the end of the string,
|
2011-10-16 12:25:42 +00:00
|
|
|
* or %NULL to indicate that the string is nul-terminated.
|
|
|
|
*
|
2012-02-27 20:27:19 +00:00
|
|
|
* Finds the start of the next UTF-8 character in the string after @p
|
2011-10-16 12:25:42 +00:00
|
|
|
*
|
|
|
|
* @p does not have to be at the beginning of a UTF-8 character. No check
|
|
|
|
* is made to see if the character found is actually valid other than
|
|
|
|
* it starts with an appropriate byte.
|
|
|
|
*
|
|
|
|
* Return value: a pointer to the found character or %NULL
|
|
|
|
**/
|
|
|
|
char *BLI_str_find_next_char_utf8(const char *p, const char *end)
|
|
|
|
{
|
|
|
|
if (*p) {
|
|
|
|
if (end) {
|
|
|
|
for (++p; p < end && (*p & 0xc0) == 0x80; ++p) {
|
|
|
|
/* do nothing */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (++p; (*p & 0xc0) == 0x80; ++p) {
|
|
|
|
/* do nothing */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (p == end) ? NULL : (char *)p;
|
|
|
|
}
|
|
|
|
|
2011-10-21 00:01:22 +00:00
|
|
|
/* was g_utf8_prev_char */
|
2011-10-16 12:25:42 +00:00
|
|
|
/**
|
2011-10-21 00:01:22 +00:00
|
|
|
* BLI_str_prev_char_utf8:
|
2012-02-27 20:27:19 +00:00
|
|
|
* @p a pointer to a position within a UTF-8 encoded string
|
2011-10-16 12:25:42 +00:00
|
|
|
*
|
2012-02-27 20:27:19 +00:00
|
|
|
* Finds the previous UTF-8 character in the string before @p
|
2011-10-16 12:25:42 +00:00
|
|
|
*
|
|
|
|
* @p does not have to be at the beginning of a UTF-8 character. No check
|
|
|
|
* is made to see if the character found is actually valid other than
|
|
|
|
* it starts with an appropriate byte. If @p might be the first
|
|
|
|
* character of the string, you must use g_utf8_find_prev_char() instead.
|
|
|
|
*
|
|
|
|
* Return value: a pointer to the found character.
|
|
|
|
**/
|
|
|
|
char *BLI_str_prev_char_utf8(const char *p)
|
|
|
|
{
|
|
|
|
while (1) {
|
|
|
|
p--;
|
|
|
|
if ((*p & 0xc0) != 0x80) {
|
|
|
|
return (char *)p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* end glib copy */
|