aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--e-util/ChangeLog26
-rw-r--r--e-util/ename/e-name-western.c270
-rw-r--r--e-util/ename/test-ename-western.c75
3 files changed, 239 insertions, 132 deletions
diff --git a/e-util/ChangeLog b/e-util/ChangeLog
index 10524d389c..4b024cedec 100644
--- a/e-util/ChangeLog
+++ b/e-util/ChangeLog
@@ -1,3 +1,29 @@
+2003-03-31 Chris Toshok <toshok@ximian.com>
+
+ * ename/test-ename-western.c (do_name): print "" if the value is
+ NULL.
+ (main): add a couple of utf8 test cases (that still use western
+ orderings).
+
+ * ename/e-name-western.c (e_name_western_str_count_words):
+ utf8-ize this.
+ (e_name_western_cleanup_string): same.
+ (e_name_western_get_words_at_idx): same.
+ (e_name_western_get_one_prefix_at_str): same.
+ (e_name_western_get_prefix_at_str): same.
+ (e_name_western_extract_first): same.
+ (e_name_western_extract_middle): same.
+ (e_name_western_extract_nickname): same.
+ (e_name_western_extract_last): same.
+ (e_name_western_get_preceding_word): same.
+ (e_name_western_get_suffix_at_str_end): same.
+ (e_name_western_detect_backwards): same.
+ (e_name_western_reorder_asshole): same.
+ (FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION): same.
+ (e_name_western_fixup): same.
+ (e_name_western_parse): validate the string and truncate it if
+ need be.
+
2003-03-27 JP Rosevear <jpr@ximian.com>
* e-account-list.c (e_account_list_find): guard against NULL keys
diff --git a/e-util/ename/e-name-western.c b/e-util/ename/e-name-western.c
index b7b2459762..33c9d53f6c 100644
--- a/e-util/ename/e-name-western.c
+++ b/e-util/ename/e-name-western.c
@@ -35,9 +35,9 @@ e_name_western_str_count_words (char *str)
word_count = 0;
- for (p = str; p != NULL; p = strchr (p, ' ')) {
+ for (p = str; p != NULL; p = g_utf8_strchr (p, -1, ' ')) {
word_count ++;
- p ++;
+ p = g_utf8_next_char (p);
}
return word_count;
@@ -54,21 +54,22 @@ e_name_western_cleanup_string (char **str)
/* skip any spaces and commas at the start of the string */
p = *str;
- while (isspace ((unsigned char)*p) || *p == ',')
- p ++;
+ while (g_unichar_isspace (g_utf8_get_char(p)) || *p == ',')
+ p = g_utf8_next_char (p);
/* make the copy we're going to return */
newstr = g_strdup (p);
if ( strlen(newstr) > 0) {
/* now search from the back, skipping over any spaces and commas */
- p = newstr + strlen (newstr) - 1;
- while (isspace ((unsigned char)*p) || *p == ',')
- p --;
+ p = newstr + strlen (newstr);
+ p = g_utf8_prev_char (p);
+ while (g_unichar_isspace (g_utf8_get_char(p)) || *p == ',')
+ p = g_utf8_prev_char (p);
/* advance p to after the character that caused us to exit the
previous loop, and end the string. */
- if ((! isspace ((unsigned char)*p)) && *p != ',')
- p ++;
+ if ((! g_unichar_isspace (g_utf8_get_char (p))) && *p != ',')
+ p = g_utf8_next_char (p);
*p = '\0';
}
@@ -79,35 +80,29 @@ e_name_western_cleanup_string (char **str)
static char *
e_name_western_get_words_at_idx (char *str, int idx, int num_words)
{
- char *words;
+ GString *words;
char *p;
int word_count;
- int words_len;
/*
* Walk to the end of the words.
*/
+ words = g_string_new ("");
word_count = 0;
p = str + idx;
while (word_count < num_words && *p != '\0') {
- while (! isspace ((unsigned char)*p) && *p != '\0')
- p ++;
+ while (! g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0') {
+ words = g_string_append_unichar (words, g_utf8_get_char (p));
+ p = g_utf8_next_char (p);
+ }
- while (isspace ((unsigned char)*p) && *p != '\0')
- p ++;
+ while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
+ p = g_utf8_next_char (p);
word_count ++;
}
- words_len = p - str - idx - 1;
-
- if (*p == '\0')
- words_len ++;
-
- words = g_malloc0 (1 + words_len);
- strncpy (words, str + idx, words_len);
-
- return words;
+ return g_string_free (words, FALSE);
}
/*
@@ -167,9 +162,9 @@ e_name_western_get_one_prefix_at_str (char *str)
*/
word = e_name_western_get_words_at_idx (str, 0, 1);
- if (strlen (word) > 2 &&
- isalpha ((unsigned char) word [0]) &&
- isalpha ((unsigned char) word [1]) &&
+ if (g_utf8_strlen (word, -1) > 2 &&
+ g_unichar_isalpha (g_utf8_get_char (word)) &&
+ g_unichar_isalpha (g_utf8_get_char (g_utf8_next_char (word))) &&
word [strlen (word) - 1] == '.')
return word;
@@ -194,8 +189,8 @@ e_name_western_get_prefix_at_str (char *str)
/* Check for a second prefix. */
p = str + strlen (pfx1);
- while (isspace ((unsigned char)*p) && *p != '\0')
- p ++;
+ while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
+ p = g_utf8_next_char (p);
pfx2 = e_name_western_get_one_prefix_at_str (p);
@@ -258,8 +253,8 @@ e_name_western_extract_first (ENameWestern *name, ENameWesternIdxs *idxs)
/* Skip past white space. */
p = name->full + first_idx;
- while (isspace ((unsigned char)*p) && *p != '\0')
- p++;
+ while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
+ p = g_utf8_next_char (p);
if (*p == '\0')
return;
@@ -295,7 +290,7 @@ static void
e_name_western_extract_middle (ENameWestern *name, ENameWesternIdxs *idxs)
{
char *word;
- int middle_idx;
+ char *middle;
/*
* Middle names can only exist if you have a first name.
@@ -303,42 +298,44 @@ e_name_western_extract_middle (ENameWestern *name, ENameWesternIdxs *idxs)
if (idxs->first_idx == -1)
return;
- middle_idx = idxs->first_idx + strlen (name->first) + 1;
+ middle = name->full + idxs->first_idx + strlen (name->first);
+ middle = g_utf8_next_char (middle);
- if (middle_idx > strlen (name->full))
+ if (*middle == '\0')
return;
/*
* Search for the first space (or the terminating \0)
*/
- while (isspace ((unsigned char)name->full [middle_idx]) &&
- name->full [middle_idx] != '\0')
- middle_idx ++;
+ while (g_unichar_isspace (g_utf8_get_char (middle)) &&
+ *middle != '\0')
+ middle = g_utf8_next_char (middle);
- if (name->full [middle_idx] == '\0')
+ if (*middle == '\0')
return;
/*
* Skip past the nickname, if it's there.
*/
- if (name->full [middle_idx] == '\"') {
+ if (*middle == '\"') {
if (idxs->nick_idx == -1)
return;
- middle_idx = idxs->nick_idx + strlen (name->nick) + 1;
+ middle = name->full + idxs->nick_idx + strlen (name->nick);
+ middle = g_utf8_next_char (middle);
- while (isspace ((unsigned char)name->full [middle_idx]) &&
- name->full [middle_idx] != '\0')
- middle_idx ++;
+ while (g_unichar_isspace (g_utf8_get_char (middle)) &&
+ *middle != '\0')
+ middle = g_utf8_next_char (middle);
- if (name->full [middle_idx] == '\0')
+ if (*middle == '\0')
return;
}
/*
* Make sure this isn't the beginning of a complex last name.
*/
- word = e_name_western_get_words_at_idx (name->full, middle_idx, 1);
+ word = e_name_western_get_words_at_idx (name->full, middle - name->full, 1);
if (e_name_western_is_complex_last_beginning (word)) {
g_free (word);
return;
@@ -361,48 +358,52 @@ e_name_western_extract_middle (ENameWestern *name, ENameWesternIdxs *idxs)
return;
}
- idxs->middle_idx = middle_idx;
+ idxs->middle_idx = middle - name->full;
name->middle = word;
}
static void
e_name_western_extract_nickname (ENameWestern *name, ENameWesternIdxs *idxs)
{
- int idx;
+ char *nick;
int start_idx;
- char *str;
+ GString *str;
if (idxs->first_idx == -1)
return;
if (idxs->middle_idx > idxs->first_idx)
- idx = idxs->middle_idx + strlen (name->middle);
+ nick = name->full + idxs->middle_idx + strlen (name->middle);
else
- idx = idxs->first_idx + strlen (name->first);
+ nick = name->full + idxs->first_idx + strlen (name->first);
- while (name->full [idx] != '\"' && name->full [idx] != '\0')
- idx ++;
+ while (*nick != '\"' && *nick != '\0')
+ nick = g_utf8_next_char (nick);
- if (name->full [idx] != '\"')
+ if (*nick != '\"')
return;
- start_idx = idx;
+ start_idx = nick - name->full;
/*
* Advance to the next double quote.
*/
- idx ++;
-
- while (name->full [idx] != '\"' && name->full [idx] != '\0')
- idx ++;
+ str = g_string_new ("\"");
+ nick = g_utf8_next_char (nick);
+
+ while (*nick != '\"' && *nick != '\0') {
+ str = g_string_append_unichar (str, g_utf8_get_char (nick));
+ nick = g_utf8_next_char (nick);
+ }
- if (name->full [idx] == '\0')
+ if (*nick == '\0') {
+ g_string_free (str, TRUE);
return;
+ }
+ str = g_string_append (str, "\"");
- str = g_malloc0 (idx - start_idx + 2);
- strncpy (str, name->full + start_idx, idx - start_idx + 1);
+ name->nick = g_string_free (str, FALSE);
- name->nick = str;
idxs->nick_idx = start_idx;
}
@@ -435,6 +436,7 @@ e_name_western_extract_last (ENameWestern *name, ENameWesternIdxs *idxs)
{
char *word;
int idx = -1;
+ char *last;
idx = e_name_western_last_get_max_idx (name, idxs);
@@ -463,14 +465,16 @@ e_name_western_extract_last (ENameWestern *name, ENameWesternIdxs *idxs)
return;
}
+ last = name->full + idx;
+
/* Skip past the white space. */
- while (isspace ((unsigned char)name->full [idx]) && name->full [idx] != '\0')
- idx ++;
+ while (g_unichar_isspace (g_utf8_get_char (last)) && *last != '\0')
+ last = g_utf8_next_char (last);
- if (name->full [idx] == '\0')
+ if (*last == '\0')
return;
- word = e_name_western_get_words_at_idx (name->full, idx, 1);
+ word = e_name_western_get_words_at_idx (name->full, last - name->full, 1);
e_name_western_cleanup_string (& word);
if (e_name_western_word_is_suffix (word)) {
g_free (word);
@@ -485,8 +489,8 @@ e_name_western_extract_last (ENameWestern *name, ENameWesternIdxs *idxs)
* Amozorrutia" without dropping data and forcing the user
* to retype it.
*/
- name->last = g_strdup (name->full + idx);
- idxs->last_idx = idx;
+ name->last = g_strdup (last);
+ idxs->last_idx = last - name->full;
}
static char *
@@ -498,14 +502,14 @@ e_name_western_get_preceding_word (char *str, int idx)
p = str + idx;
- while (isspace ((unsigned char)*p) && p > str)
- p --;
+ while (g_unichar_isspace (g_utf8_get_char (p)) && p > str)
+ p = g_utf8_prev_char (p);
- while (! isspace ((unsigned char)*p) && p > str)
- p --;
+ while (! g_unichar_isspace (g_utf8_get_char (p)) && p > str)
+ p = g_utf8_prev_char (p);
- if (isspace ((unsigned char)*p))
- p ++;
+ if (g_unichar_isspace (g_utf8_get_char (p)))
+ p = g_utf8_next_char (p);
word_len = (str + idx) - p;
word = g_malloc0 (word_len + 1);
@@ -531,7 +535,8 @@ e_name_western_get_suffix_at_str_end (char *str)
char *word;
word = e_name_western_get_preceding_word (str, p - str);
- nextp = p - strlen (word) - 1;
+ nextp = p - strlen (word);
+ nextp = g_utf8_prev_char (nextp);
e_name_western_cleanup_string (& word);
@@ -561,7 +566,6 @@ e_name_western_get_suffix_at_str_end (char *str)
static void
e_name_western_extract_suffix (ENameWestern *name, ENameWesternIdxs *idxs)
{
-
name->suffix = e_name_western_get_suffix_at_str_end (name->full);
if (name->suffix == NULL)
@@ -576,7 +580,7 @@ e_name_western_detect_backwards (ENameWestern *name, ENameWesternIdxs *idxs)
char *comma;
char *word;
- comma = strchr (name->full, ',');
+ comma = g_utf8_strchr (name->full, -1, ',');
if (comma == NULL)
return FALSE;
@@ -629,14 +633,14 @@ e_name_western_reorder_asshole (ENameWestern *name, ENameWesternIdxs *idxs)
* Everything from the end of the prefix to the comma is the
* last name.
*/
- comma = strchr (name->full, ',');
+ comma = g_utf8_strchr (name->full, -1, ',');
if (comma == NULL)
return;
p = name->full + (prefix == NULL ? 0 : strlen (prefix));
- while (isspace ((unsigned char)*p) && *p != '\0')
- p ++;
+ while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
+ p = g_utf8_next_char (p);
last = g_malloc0 (comma - p + 1);
strncpy (last, p, comma - p);
@@ -650,10 +654,10 @@ e_name_western_reorder_asshole (ENameWestern *name, ENameWesternIdxs *idxs)
* Firstmidnick is everything from the comma to the beginning
* of the suffix.
*/
- p = comma + 1;
+ p = g_utf8_next_char (comma);
- while (isspace ((unsigned char)*p) && *p != '\0')
- p ++;
+ while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
+ p = g_utf8_next_char (p);
if (suffix != NULL) {
char *q;
@@ -661,14 +665,15 @@ e_name_western_reorder_asshole (ENameWestern *name, ENameWesternIdxs *idxs)
/*
* Point q at the beginning of the suffix.
*/
- q = name->full + strlen (name->full) - strlen (suffix) - 1;
+ q = name->full + strlen (name->full) - strlen (suffix);
+ q = g_utf8_prev_char (q);
/*
* Walk backwards until we hit the space which
* separates the suffix from firstmidnick.
*/
- while (! isspace ((unsigned char)*q) && q > comma)
- q --;
+ while (! g_unichar_isspace (g_utf8_get_char (q)) && q > comma)
+ q = g_utf8_prev_char (q);
if ((q - p + 1) > 0) {
firstmidnick = g_malloc0 (q - p + 1);
@@ -710,43 +715,49 @@ e_name_western_zap_nil (char **str, int *idx)
*str = NULL;
}
-#define FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \
- char *last_start = NULL; \
- if (name->last) \
- last_start = strchr (name->last, ' '); \
- if (last_start) { \
- char *new_last, *new_first; \
- \
- new_last = g_strdup (last_start + 1); \
- *last_start = '\0'; \
- \
+#define FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \
+ char *last_start = NULL; \
+ if (name->last) \
+ last_start = g_utf8_strchr (name->last, -1, ' '); \
+ if (last_start) { \
+ char *new_last, *new_first; \
+ \
+ new_last = g_strdup (g_utf8_next_char (last_start)); \
+ *last_start = '\0'; \
+ \
idxs->last_idx += (last_start - name->last) + 1; \
- \
- new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last); \
- \
- g_free (name->first); \
- g_free (name->middle); \
- g_free (name->last); \
- \
- name->first = new_first; \
- name->middle = NULL; \
- name->last = new_last; \
- \
- idxs->middle_idx = -1; \
- } else { \
- char *new_first; \
- \
- new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last); \
- \
- g_free (name->first); \
- g_free (name->middle); \
- g_free (name->last); \
- \
- name->first = new_first; \
- name->middle = NULL; \
- name->last = NULL; \
- idxs->middle_idx = -1; \
- idxs->last_idx = -1; \
+ \
+ new_first = g_strdup_printf ("%s %s %s", \
+ name->first, \
+ name->middle, \
+ name->last); \
+ \
+ g_free (name->first); \
+ g_free (name->middle); \
+ g_free (name->last); \
+ \
+ name->first = new_first; \
+ name->middle = NULL; \
+ name->last = new_last; \
+ \
+ idxs->middle_idx = -1; \
+ } else { \
+ char *new_first; \
+ \
+ new_first = g_strdup_printf ("%s %s %s", \
+ name->first, \
+ name->middle, \
+ name->last); \
+ \
+ g_free (name->first); \
+ g_free (name->middle); \
+ g_free (name->last); \
+ \
+ name->first = new_first; \
+ name->middle = NULL; \
+ name->last = NULL; \
+ idxs->middle_idx = -1; \
+ idxs->last_idx = -1; \
}
#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION(conj) \
@@ -795,10 +806,11 @@ e_name_western_fixup (ENameWestern *name, ENameWesternIdxs *idxs)
char *newlast;
char *p;
- p = sfx - 1;
- while (isspace ((unsigned char)*p) && p > name->last)
- p --;
- p ++;
+ p = sfx;
+ p = g_utf8_prev_char (p);
+ while (g_unichar_isspace (g_utf8_get_char (p)) && p > name->last)
+ p = g_utf8_prev_char (p);
+ p = g_utf8_next_char (p);
newlast = g_malloc0 (p - name->last + 1);
strncpy (newlast, name->last, p - name->last);
@@ -890,6 +902,12 @@ e_name_western_parse (const char *full_name)
{
ENameWesternIdxs *idxs;
ENameWestern *wname;
+ char *end;
+
+ if (!g_utf8_validate (full_name, -1, (const char **)&end)) {
+ g_warning ("e_name_western_parse passed invalid UTF-8 sequence");
+ *end = '\0';
+ }
wname = g_new0 (ENameWestern, 1);
diff --git a/e-util/ename/test-ename-western.c b/e-util/ename/test-ename-western.c
index 09847b5b4b..7a0ab09857 100644
--- a/e-util/ename/test-ename-western.c
+++ b/e-util/ename/test-ename-western.c
@@ -4,6 +4,7 @@
#include <gtk/gtkmain.h>
#include <ename/e-name-western.h>
+
static void
do_name (char *n)
{
@@ -13,12 +14,12 @@ do_name (char *n)
printf ("Full Name: [%s]\n", n);
- printf ("Prefix: [%s]\n", wname->prefix);
- printf ("First: [%s]\n", wname->first);
- printf ("Middle: [%s]\n", wname->middle);
- printf ("Nick: [%s]\n", wname->nick);
- printf ("Last: [%s]\n", wname->last);
- printf ("Suffix: [%s]\n", wname->suffix);
+ printf ("Prefix: [%s]\n", wname->prefix ? wname->prefix : "");
+ printf ("First: [%s]\n", wname->first ? wname->first : "");
+ printf ("Middle: [%s]\n", wname->middle ? wname->middle : "");
+ printf ("Nick: [%s]\n", wname->nick ? wname->nick : "");
+ printf ("Last: [%s]\n", wname->last ? wname->last : "");
+ printf ("Suffix: [%s]\n", wname->suffix ? wname->suffix : "");
printf ("\n");
@@ -28,6 +29,7 @@ do_name (char *n)
int
main (int argc, char **argv)
{
+ GString *str;
if (argc == 2) {
while (! feof (stdin)) {
char s[256];
@@ -67,5 +69,66 @@ main (int argc, char **argv)
do_name ("Nick Glennie-Smith");
do_name ("Dr von Johnson, Albert Roderick Jr");
+ /* create a name of the form:
+
+ <Prefix> <First name> <Nickname> <Middle> <Last name> <Suffix>
+
+ composed almost entirely of multibyte utf8 sequences.
+ */
+ str = g_string_new ("Dr. ");
+
+ str = g_string_append_unichar (str, 0x5341);
+ str = g_string_append_unichar (str, 0x57CE);
+ str = g_string_append_unichar (str, 0x76EE);
+
+ str = g_string_append (str, " \"");
+ str = g_string_append_unichar (str, 0x5341);
+ str = g_string_append_unichar (str, 0x5341);
+ str = g_string_append (str, "\" ");
+
+ str = g_string_append_unichar (str, 0x5341);
+ str = g_string_append_unichar (str, 0x76EE);
+
+ str = g_string_append (str, " ");
+
+ str = g_string_append_unichar (str, 0x76EE);
+ str = g_string_append_unichar (str, 0x76EE);
+ str = g_string_append (str, ", Esquire");
+
+ do_name (str->str);
+
+ str = g_string_assign (str, "");
+
+ /* Now try a utf8 sequence of the form:
+
+ Prefix Last, First Middle Suffix
+ */
+
+ str = g_string_new ("Dr. ");
+
+ /* last */
+ str = g_string_append_unichar (str, 0x5341);
+ str = g_string_append_unichar (str, 0x57CE);
+ str = g_string_append_unichar (str, 0x76EE);
+
+ str = g_string_append (str, ", ");
+
+ /* first */
+ str = g_string_append_unichar (str, 0x5341);
+ str = g_string_append_unichar (str, 0x76EE);
+ str = g_string_append_unichar (str, 0x57CE);
+
+ str = g_string_append (str, " ");
+
+ /* middle */
+ str = g_string_append_unichar (str, 0x5341);
+ str = g_string_append_unichar (str, 0x76EE);
+ str = g_string_append_unichar (str, 0x76EE);
+ str = g_string_append_unichar (str, 0x76EE);
+
+ str = g_string_append (str, ", Esquire");
+
+ do_name (str->str);
+
return 0;
}