aboutsummaryrefslogtreecommitdiffstats
path: root/widgets/misc/e-unicode.c
diff options
context:
space:
mode:
authorLauris Kaplinski <lauris@src.gnome.org>2001-01-03 11:03:34 +0800
committerLauris Kaplinski <lauris@src.gnome.org>2001-01-03 11:03:34 +0800
commit33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4 (patch)
treefd13e80fef99053bf72afbd9618a3e3be77fabdc /widgets/misc/e-unicode.c
parent71bf4380a14b21d61b228255df2aeb014e25cdb4 (diff)
downloadgsoc2013-evolution-33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4.tar.gz
gsoc2013-evolution-33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4.tar.zst
gsoc2013-evolution-33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4.zip
Added functions for encoding utf8 safely into libxml brokenness
svn path=/trunk/; revision=7225
Diffstat (limited to 'widgets/misc/e-unicode.c')
-rw-r--r--widgets/misc/e-unicode.c85
1 files changed, 85 insertions, 0 deletions
diff --git a/widgets/misc/e-unicode.c b/widgets/misc/e-unicode.c
index d40cc7d88d..c4c612002a 100644
--- a/widgets/misc/e-unicode.c
+++ b/widgets/misc/e-unicode.c
@@ -9,7 +9,9 @@
*/
#include <config.h>
+#include <ctype.h>
#include <string.h>
+#include <stdio.h>
#include <unicode.h>
#include <iconv.h>
#include <gdk/gdk.h>
@@ -415,6 +417,89 @@ e_utf8_gtk_clist_append (GtkCList *clist, gchar *text[])
return row;
}
+/*
+ * Translate \U+XXXX\ sequences to utf8 chars
+ */
+
+gchar *
+e_utf8_xml1_decode (const gchar *text)
+{
+ const guchar *c;
+ guchar *u, *d;
+ int len, s, e;
+
+ g_return_val_if_fail (text != NULL, NULL);
+
+ len = strlen (text);
+ /* len * 2 is absolute maximum */
+ u = d = g_malloc (len * 2);
+
+ c = text;
+ s = 0;
+ while (s < len) {
+ if ((s <= (len - 8)) &&
+ (c[s ] == '\\') &&
+ (c[s + 1] == 'U' ) &&
+ (c[s + 2] == '+' ) &&
+ isxdigit (c[s + 3]) &&
+ isxdigit (c[s + 4]) &&
+ isxdigit (c[s + 5]) &&
+ isxdigit (c[s + 6]) &&
+ (c[s + 7] == '\\')) {
+ /* Valid \U+XXXX\ sequence */
+ unsigned int unival;
+ unival = strtol (c + s + 3, NULL, 16);
+ d += g_unichar_to_utf8 (unival, d);
+ s += 8;
+ } else if (c[s] > 127) {
+ /* fixme: We assume iso-8859-1 currently */
+ d += g_unichar_to_utf8 (c[s], d);
+ s += 1;
+ } else {
+ *d++ = c[s++];
+ }
+ }
+ *d++ = '\0';
+ u = g_realloc (u, (d - u));
+
+ return u;
+}
+
+gchar *
+e_utf8_xml1_encode (const gchar *text)
+{
+ guchar *u, *d, *c;
+ int unival;
+ int len;
+
+ g_return_val_if_fail (text != NULL, NULL);
+
+ len = 0;
+ for (u = unicode_get_utf8 (text, &unival); u && unival; u = unicode_get_utf8 (u, &unival)) {
+ if ((unival >= 0x80) || (unival == '\\')) {
+ len += 8;
+ } else {
+ len += 1;
+ }
+ }
+ d = c = g_new (guchar, len + 1);
+
+ for (u = unicode_get_utf8 (text, &unival); u && unival; u = unicode_get_utf8 (u, &unival)) {
+ if ((unival >= 0x80) || (unival == '\\')) {
+ *c++ = '\\';
+ *c++ = 'U';
+ *c++ = '+';
+ c += sprintf (c, "%04x", unival);
+ *c++ = '\\';
+ } else {
+ *c++ = unival;
+ }
+ }
+ *c = '\0';
+
+ return d;
+}
+
/**
* g_unichar_to_utf8:
* @c: a ISO10646 character code