From 33e0d0e847cee22aaa127e3ae1e75c27cc41f5b4 Mon Sep 17 00:00:00 2001 From: Lauris Kaplinski Date: Wed, 3 Jan 2001 03:03:34 +0000 Subject: Added functions for encoding utf8 safely into libxml brokenness svn path=/trunk/; revision=7225 --- e-util/e-xml-utils.c | 1 + widgets/misc/e-unicode.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ widgets/misc/e-unicode.h | 15 +++++++++ 3 files changed, 101 insertions(+) diff --git a/e-util/e-xml-utils.c b/e-util/e-xml-utils.c index db6be09ebc..0f5d7d7292 100644 --- a/e-util/e-xml-utils.c +++ b/e-util/e-xml-utils.c @@ -228,3 +228,4 @@ e_xml_get_translated_string_prop_by_name(const xmlNode *parent, const xmlChar *p } return ret_val; } + diff --git a/widgets/misc/e-unicode.c b/widgets/misc/e-unicode.c index d40cc7d88d..c4c612002a 100644 --- a/widgets/misc/e-unicode.c +++ b/widgets/misc/e-unicode.c @@ -9,7 +9,9 @@ */ #include +#include #include +#include #include #include #include @@ -415,6 +417,89 @@ e_utf8_gtk_clist_append (GtkCList *clist, gchar *text[]) return row; } +/* + * Translate \U+XXXX\ sequences to utf8 chars + */ + +gchar * +e_utf8_xml1_decode (const gchar *text) +{ + const guchar *c; + guchar *u, *d; + int len, s, e; + + g_return_val_if_fail (text != NULL, NULL); + + len = strlen (text); + /* len * 2 is absolute maximum */ + u = d = g_malloc (len * 2); + + c = text; + s = 0; + while (s < len) { + if ((s <= (len - 8)) && + (c[s ] == '\\') && + (c[s + 1] == 'U' ) && + (c[s + 2] == '+' ) && + isxdigit (c[s + 3]) && + isxdigit (c[s + 4]) && + isxdigit (c[s + 5]) && + isxdigit (c[s + 6]) && + (c[s + 7] == '\\')) { + /* Valid \U+XXXX\ sequence */ + unsigned int unival; + unival = strtol (c + s + 3, NULL, 16); + d += g_unichar_to_utf8 (unival, d); + s += 8; + } else if (c[s] > 127) { + /* fixme: We assume iso-8859-1 currently */ + d += g_unichar_to_utf8 (c[s], d); + s += 1; + } else { + *d++ = c[s++]; + } + } + *d++ = '\0'; + u = g_realloc (u, (d - u)); + + return u; +} + +gchar * +e_utf8_xml1_encode (const gchar *text) +{ + guchar *u, *d, *c; + int unival; + int len; + + g_return_val_if_fail (text != NULL, NULL); + + len = 0; + for (u = unicode_get_utf8 (text, &unival); u && unival; u = unicode_get_utf8 (u, &unival)) { + if ((unival >= 0x80) || (unival == '\\')) { + len += 8; + } else { + len += 1; + } + } + d = c = g_new (guchar, len + 1); + + for (u = unicode_get_utf8 (text, &unival); u && unival; u = unicode_get_utf8 (u, &unival)) { + if ((unival >= 0x80) || (unival == '\\')) { + *c++ = '\\'; + *c++ = 'U'; + *c++ = '+'; + c += sprintf (c, "%04x", unival); + *c++ = '\\'; + } else { + *c++ = unival; + } + } + *c = '\0'; + + return d; +} + /** * g_unichar_to_utf8: * @c: a ISO10646 character code diff --git a/widgets/misc/e-unicode.h b/widgets/misc/e-unicode.h index 7734661bef..1b51595f6c 100644 --- a/widgets/misc/e-unicode.h +++ b/widgets/misc/e-unicode.h @@ -1,6 +1,16 @@ #ifndef _E_UNICODE_H_ #define _E_UNICODE_H_ +/* + * UTF-8 support functions for gal + * + * Authors: + * Lauris Kaplinski + * + * Copyright (C) 2000-2001 Helix Code, Inc. + * + */ + #include #include #include @@ -15,6 +25,8 @@ BEGIN_GNOME_DECLS #define gnome_font_lookup_default gnome_font_get_glyph #endif +#define G_UTF8_IN_GAL + void e_unicode_init (void); /* @@ -56,6 +68,9 @@ GtkWidget *e_utf8_gtk_menu_item_new_with_label (GtkMenu *menu, const gchar *labe void e_utf8_gtk_clist_set_text (GtkCList *clist, gint row, gint col, const gchar *text); gint e_utf8_gtk_clist_append (GtkCList *clist, gchar *text[]); +gchar * e_utf8_xml1_decode (const gchar *text); +gchar * e_utf8_xml1_encode (const gchar *text); + gint g_unichar_to_utf8 (gint c, gchar *outbuf); guint32 gdk_keyval_to_unicode (guint keysym); -- cgit