diff options
author | Lauris Kaplinski <lauris@src.gnome.org> | 2001-01-03 11:03:34 +0800 |
---|---|---|
committer | Lauris Kaplinski <lauris@src.gnome.org> | 2001-01-03 11:03:34 +0800 |
commit | 33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4 (patch) | |
tree | fd13e80fef99053bf72afbd9618a3e3be77fabdc /widgets/misc/e-unicode.c | |
parent | 71bf4380a14b21d61b228255df2aeb014e25cdb4 (diff) | |
download | gsoc2013-evolution-33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4.tar.gz gsoc2013-evolution-33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4.tar.zst gsoc2013-evolution-33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4.zip |
Added functions for encoding utf8 safely into libxml brokenness
svn path=/trunk/; revision=7225
Diffstat (limited to 'widgets/misc/e-unicode.c')
-rw-r--r-- | widgets/misc/e-unicode.c | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/widgets/misc/e-unicode.c b/widgets/misc/e-unicode.c index d40cc7d88d..c4c612002a 100644 --- a/widgets/misc/e-unicode.c +++ b/widgets/misc/e-unicode.c @@ -9,7 +9,9 @@ */ #include <config.h> +#include <ctype.h> #include <string.h> +#include <stdio.h> #include <unicode.h> #include <iconv.h> #include <gdk/gdk.h> @@ -415,6 +417,89 @@ e_utf8_gtk_clist_append (GtkCList *clist, gchar *text[]) return row; } +/* + * Translate \U+XXXX\ sequences to utf8 chars + */ + +gchar * +e_utf8_xml1_decode (const gchar *text) +{ + const guchar *c; + guchar *u, *d; + int len, s, e; + + g_return_val_if_fail (text != NULL, NULL); + + len = strlen (text); + /* len * 2 is absolute maximum */ + u = d = g_malloc (len * 2); + + c = text; + s = 0; + while (s < len) { + if ((s <= (len - 8)) && + (c[s ] == '\\') && + (c[s + 1] == 'U' ) && + (c[s + 2] == '+' ) && + isxdigit (c[s + 3]) && + isxdigit (c[s + 4]) && + isxdigit (c[s + 5]) && + isxdigit (c[s + 6]) && + (c[s + 7] == '\\')) { + /* Valid \U+XXXX\ sequence */ + unsigned int unival; + unival = strtol (c + s + 3, NULL, 16); + d += g_unichar_to_utf8 (unival, d); + s += 8; + } else if (c[s] > 127) { + /* fixme: We assume iso-8859-1 currently */ + d += g_unichar_to_utf8 (c[s], d); + s += 1; + } else { + *d++ = c[s++]; + } + } + *d++ = '\0'; + u = g_realloc (u, (d - u)); + + return u; +} + +gchar * +e_utf8_xml1_encode (const gchar *text) +{ + guchar *u, *d, *c; + int unival; + int len; + + g_return_val_if_fail (text != NULL, NULL); + + len = 0; + for (u = unicode_get_utf8 (text, &unival); u && unival; u = unicode_get_utf8 (u, &unival)) { + if ((unival >= 0x80) || (unival == '\\')) { + len += 8; + } else { + len += 1; + } + } + d = c = g_new (guchar, len + 1); + + for (u = unicode_get_utf8 (text, &unival); u && unival; u = unicode_get_utf8 (u, &unival)) { + if ((unival >= 0x80) || (unival == '\\')) { + *c++ = '\\'; + *c++ = 'U'; + *c++ = '+'; + c += sprintf (c, "%04x", unival); + *c++ = '\\'; + } else { + *c++ = unival; + } + } + *c = '\0'; + + return d; +} + /** * g_unichar_to_utf8: * @c: a ISO10646 character code |