forked from blender/blender
Sergey Sharybin
a12a8a71bb
The goal is to solve confusion of the "All rights reserved" for licensing code under an open-source license. The phrase "All rights reserved" comes from a historical convention that required this phrase for the copyright protection to apply. This convention is no longer relevant. However, even though the phrase has no meaning in establishing the copyright it has not lost meaning in terms of licensing. This change makes it so code under the Blender Foundation copyright does not use "all rights reserved". This is also how the GPL license itself states how to apply it to the source code: <one line to give the program's name and a brief idea of what it does.> Copyright (C) <year> <name of author> This program is free software ... This change does not change copyright notice in cases when the copyright is dual (BF and an author), or just an author of the code. It also does mot change copyright which is inherited from NaN Holding BV as it needs some further investigation about what is the proper way to handle it.
302 lines
5.8 KiB
C
302 lines
5.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later
|
|
* Copyright 2012 Blender Foundation */
|
|
|
|
/** \file
|
|
* \ingroup intern_utf_conv
|
|
*/
|
|
|
|
#include "utfconv.h"
|
|
|
|
size_t count_utf_8_from_16(const wchar_t *string16)
|
|
{
|
|
int i;
|
|
size_t count = 0;
|
|
wchar_t u = 0;
|
|
if (!string16) {
|
|
return 0;
|
|
}
|
|
|
|
for (i = 0; (u = string16[i]); i++) {
|
|
if (u < 0x0080) {
|
|
count += 1;
|
|
}
|
|
else {
|
|
if (u < 0x0800) {
|
|
count += 2;
|
|
}
|
|
else {
|
|
if (u < 0xD800) {
|
|
count += 3;
|
|
}
|
|
else {
|
|
if (u < 0xDC00) {
|
|
i++;
|
|
if ((u = string16[i]) == 0) {
|
|
break;
|
|
}
|
|
if (u >= 0xDC00 && u < 0xE000) {
|
|
count += 4;
|
|
}
|
|
}
|
|
else {
|
|
if (u < 0xE000) {
|
|
/* Illegal. */
|
|
}
|
|
else {
|
|
count += 3;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ++count;
|
|
}
|
|
|
|
size_t count_utf_16_from_8(const char *string8)
|
|
{
|
|
size_t count = 0;
|
|
char u;
|
|
char type = 0;
|
|
unsigned int u32 = 0;
|
|
|
|
if (!string8) {
|
|
return 0;
|
|
}
|
|
|
|
for (; (u = *string8); string8++) {
|
|
if (type == 0) {
|
|
if ((u & 0x01 << 7) == 0) {
|
|
count++;
|
|
u32 = 0;
|
|
continue;
|
|
} // 1 utf-8 char
|
|
if ((u & 0x07 << 5) == 0xC0) {
|
|
type = 1;
|
|
u32 = u & 0x1F;
|
|
continue;
|
|
} // 2 utf-8 char
|
|
if ((u & 0x0F << 4) == 0xE0) {
|
|
type = 2;
|
|
u32 = u & 0x0F;
|
|
continue;
|
|
} // 3 utf-8 char
|
|
if ((u & 0x1F << 3) == 0xF0) {
|
|
type = 3;
|
|
u32 = u & 0x07;
|
|
continue;
|
|
} // 4 utf-8 char
|
|
continue;
|
|
}
|
|
if ((u & 0xC0) == 0x80) {
|
|
u32 = (u32 << 6) | (u & 0x3F);
|
|
type--;
|
|
}
|
|
else {
|
|
u32 = 0;
|
|
type = 0;
|
|
}
|
|
|
|
if (type == 0) {
|
|
if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
|
|
count++;
|
|
}
|
|
else if (0x10000 <= u32 && u32 < 0x110000) {
|
|
count += 2;
|
|
}
|
|
u32 = 0;
|
|
}
|
|
}
|
|
|
|
return ++count;
|
|
}
|
|
|
|
int conv_utf_16_to_8(const wchar_t *in16, char *out8, size_t size8)
|
|
{
|
|
char *out8end = out8 + size8;
|
|
wchar_t u = 0;
|
|
int err = 0;
|
|
if (!size8 || !in16 || !out8) {
|
|
return UTF_ERROR_NULL_IN;
|
|
}
|
|
out8end--;
|
|
|
|
for (; out8 < out8end && (u = *in16); in16++, out8++) {
|
|
if (u < 0x0080) {
|
|
*out8 = u;
|
|
}
|
|
else if (u < 0x0800) {
|
|
if (out8 + 1 >= out8end) {
|
|
break;
|
|
}
|
|
*out8++ = (0x3 << 6) | (0x1F & (u >> 6));
|
|
*out8 = (0x1 << 7) | (0x3F & (u));
|
|
}
|
|
else if (u < 0xD800 || u >= 0xE000) {
|
|
if (out8 + 2 >= out8end) {
|
|
break;
|
|
}
|
|
*out8++ = (0x7 << 5) | (0xF & (u >> 12));
|
|
*out8++ = (0x1 << 7) | (0x3F & (u >> 6));
|
|
*out8 = (0x1 << 7) | (0x3F & (u));
|
|
}
|
|
else if (u < 0xDC00) {
|
|
wchar_t u2 = *++in16;
|
|
|
|
if (!u2) {
|
|
break;
|
|
}
|
|
if (u2 >= 0xDC00 && u2 < 0xE000) {
|
|
if (out8 + 3 >= out8end) {
|
|
break;
|
|
}
|
|
unsigned int uc = 0x10000 + (u2 - 0xDC00) + ((u - 0xD800) << 10);
|
|
|
|
*out8++ = (0xF << 4) | (0x7 & (uc >> 18));
|
|
*out8++ = (0x1 << 7) | (0x3F & (uc >> 12));
|
|
*out8++ = (0x1 << 7) | (0x3F & (uc >> 6));
|
|
*out8 = (0x1 << 7) | (0x3F & (uc));
|
|
}
|
|
else {
|
|
out8--;
|
|
err |= UTF_ERROR_ILLCHAR;
|
|
}
|
|
}
|
|
else if (u < 0xE000) {
|
|
out8--;
|
|
err |= UTF_ERROR_ILLCHAR;
|
|
}
|
|
}
|
|
|
|
*out8 = *out8end = 0;
|
|
|
|
if (*in16) {
|
|
err |= UTF_ERROR_SMALL;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
|
|
{
|
|
char u;
|
|
char type = 0;
|
|
unsigned int u32 = 0;
|
|
wchar_t *out16end = out16 + size16;
|
|
int err = 0;
|
|
if (!size16 || !in8 || !out16) {
|
|
return UTF_ERROR_NULL_IN;
|
|
}
|
|
out16end--;
|
|
|
|
for (; out16 < out16end && (u = *in8); in8++) {
|
|
if (type == 0) {
|
|
if ((u & 0x01 << 7) == 0) {
|
|
*out16 = u;
|
|
out16++;
|
|
u32 = 0;
|
|
continue;
|
|
} // 1 utf-8 char
|
|
if ((u & 0x07 << 5) == 0xC0) {
|
|
type = 1;
|
|
u32 = u & 0x1F;
|
|
continue;
|
|
} // 2 utf-8 char
|
|
if ((u & 0x0F << 4) == 0xE0) {
|
|
type = 2;
|
|
u32 = u & 0x0F;
|
|
continue;
|
|
} // 3 utf-8 char
|
|
if ((u & 0x1F << 3) == 0xF0) {
|
|
type = 3;
|
|
u32 = u & 0x07;
|
|
continue;
|
|
} // 4 utf-8 char
|
|
err |= UTF_ERROR_ILLCHAR;
|
|
continue;
|
|
}
|
|
if ((u & 0xC0) == 0x80) {
|
|
u32 = (u32 << 6) | (u & 0x3F);
|
|
type--;
|
|
}
|
|
else {
|
|
u32 = 0;
|
|
type = 0;
|
|
err |= UTF_ERROR_ILLSEQ;
|
|
}
|
|
|
|
if (type == 0) {
|
|
if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
|
|
*out16 = u32;
|
|
out16++;
|
|
}
|
|
else if (0x10000 <= u32 && u32 < 0x110000) {
|
|
if (out16 + 1 >= out16end) {
|
|
break;
|
|
}
|
|
u32 -= 0x10000;
|
|
*out16 = 0xD800 + (u32 >> 10);
|
|
out16++;
|
|
*out16 = 0xDC00 + (u32 & 0x3FF);
|
|
out16++;
|
|
}
|
|
u32 = 0;
|
|
}
|
|
}
|
|
|
|
*out16 = *out16end = 0;
|
|
|
|
if (*in8) {
|
|
err |= UTF_ERROR_SMALL;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* UNUSED FUNCTIONS */
|
|
#if 0
|
|
static int is_ascii(const char *in8)
|
|
{
|
|
for (; *in8; in8++)
|
|
if (0x80 & *in8)
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void utf_8_cut_end(char *inout8, size_t maxcutpoint)
|
|
{
|
|
char *cur = inout8 + maxcutpoint;
|
|
char cc;
|
|
if (!inout8)
|
|
return;
|
|
|
|
cc = *cur;
|
|
}
|
|
#endif
|
|
|
|
char *alloc_utf_8_from_16(const wchar_t *in16, size_t add)
|
|
{
|
|
size_t bsize = count_utf_8_from_16(in16);
|
|
char *out8 = NULL;
|
|
if (!bsize) {
|
|
return NULL;
|
|
}
|
|
out8 = (char *)malloc(sizeof(char) * (bsize + add));
|
|
conv_utf_16_to_8(in16, out8, bsize);
|
|
return out8;
|
|
}
|
|
|
|
wchar_t *alloc_utf16_from_8(const char *in8, size_t add)
|
|
{
|
|
size_t bsize = count_utf_16_from_8(in8);
|
|
wchar_t *out16 = NULL;
|
|
if (!bsize) {
|
|
return NULL;
|
|
}
|
|
out16 = (wchar_t *)malloc(sizeof(wchar_t) * (bsize + add));
|
|
conv_utf_8_to_16(in8, out16, bsize);
|
|
return out16;
|
|
}
|