diff options
Diffstat (limited to 'addressbook/gui/merging/eab-contact-compare.c')
-rw-r--r-- | addressbook/gui/merging/eab-contact-compare.c | 736 |
1 files changed, 0 insertions, 736 deletions
diff --git a/addressbook/gui/merging/eab-contact-compare.c b/addressbook/gui/merging/eab-contact-compare.c deleted file mode 100644 index 409b1bce81..0000000000 --- a/addressbook/gui/merging/eab-contact-compare.c +++ /dev/null @@ -1,736 +0,0 @@ -/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ - -/* - * eab-contact-compare.c - * - * Copyright (C) 2001, 2002, 2003 Ximian, Inc. - * - * Authors: Jon Trowbridge <trow@ximian.com> - * Chris Toshok <toshok@ximian.com> - */ - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA. - */ - -#include <config.h> -#include <ctype.h> -#include <string.h> -#include "util/eab-book-util.h" -#include "eab-contact-compare.h" - -/* This is an "optimistic" combiner: the best of the two outcomes is - selected. */ -static EABContactMatchType -combine_comparisons (EABContactMatchType prev, - EABContactMatchType new_info) -{ - if (new_info == EAB_CONTACT_MATCH_NOT_APPLICABLE) - return prev; - return (EABContactMatchType) MAX ((gint) prev, (gint) new_info); -} - - -/*** Name comparisons ***/ - -/* This *so* doesn't belong here... at least not implemented in a - sucky way like this. But it can be fixed later. */ - -/* This is very Anglocentric. */ -static gchar *name_synonyms[][2] = { - { "jon", "john" }, /* Ah, the hacker's perogative */ - { "joseph", "joe" }, - { "robert", "bob" }, - { "gene", "jean" }, - { "jesse", "jessie" }, - { "ian", "iain" }, - { "richard", "dick" }, - { "william", "bill" }, - { "william", "will" }, - { "anthony", "tony" }, - { "michael", "mike" }, - { "eric", "erik" }, - { "elizabeth", "liz" }, - { "jeff", "geoff" }, - { "jeff", "geoffrey" }, - { "tom", "thomas" }, - { "dave", "david" }, - { "jim", "james" }, - { "abigal", "abby" }, - { "amanda", "amy" }, - { "amanda", "manda" }, - { "jennifer", "jenny" }, - { "christopher", "chris" }, - { "rebecca", "becca" }, - { "rebecca", "becky" }, - { "anderson", "andersen" }, - { "johnson", "johnsen" }, - /* We could go on and on... */ - /* We should add soundex here. */ - { NULL, NULL } -}; - -static gboolean -name_fragment_match (const gchar *a, const gchar *b, gboolean strict) -{ - gint len; - - if (!(a && b && *a && *b)) - return FALSE; - - /* If we are in 'strict' mode, b must match the beginning of a. - So "Robert", "Rob" would match, but "Robert", "Robbie" wouldn't. - - If strict is FALSE, it is sufficient for the strings to share - some leading characters. In this case, "Robert" and "Robbie" - would match, as would "Dave" and "Dan". */ - - if (strict) { - len = g_utf8_strlen (b, -1); - } else { - len = MIN (g_utf8_strlen (a, -1), g_utf8_strlen (b, -1)); - } - - return !e_utf8_casefold_collate_len (a, b, len); -} - -static gboolean -name_fragment_match_with_synonyms (const gchar *a, const gchar *b, gboolean strict) -{ - gint i; - - if (!(a && b && *a && *b)) - return FALSE; - - if (name_fragment_match (a, b, strict)) - return TRUE; - - /* Check for nicknames. Yes, the linear search blows. */ - for (i=0; name_synonyms[i][0]; ++i) { - - if (!e_utf8_casefold_collate (name_synonyms[i][0], a) - && !e_utf8_casefold_collate (name_synonyms[i][1], b)) - return TRUE; - - if (!e_utf8_casefold_collate (name_synonyms[i][0], b) - && !e_utf8_casefold_collate (name_synonyms[i][1], a)) - return TRUE; - } - - return FALSE; -} - -EABContactMatchType -eab_contact_compare_name_to_string (EContact *contact, const gchar *str) -{ - return eab_contact_compare_name_to_string_full (contact, str, FALSE, NULL, NULL, NULL); -} - -EABContactMatchType -eab_contact_compare_name_to_string_full (EContact *contact, const gchar *str, gboolean allow_partial_matches, - gint *matched_parts_out, EABContactMatchPart *first_matched_part_out, gint *matched_character_count_out) -{ - gchar **namev, **givenv = NULL, **addv = NULL, **familyv = NULL; - - gint matched_parts = EAB_CONTACT_MATCH_PART_NONE; - EABContactMatchPart first_matched_part = EAB_CONTACT_MATCH_PART_NONE; - EABContactMatchPart this_part_match = EAB_CONTACT_MATCH_PART_NOT_APPLICABLE; - EABContactMatchType match_type; - EContactName *contact_name; - - gint match_count = 0, matched_character_count = 0, fragment_count; - gint i, j; - gchar *str_cpy, *s; - - g_return_val_if_fail (E_IS_CONTACT (contact), EAB_CONTACT_MATCH_NOT_APPLICABLE); - - if (!e_contact_get_const (contact, E_CONTACT_FULL_NAME)) - return EAB_CONTACT_MATCH_NOT_APPLICABLE; - if (str == NULL) - return EAB_CONTACT_MATCH_NOT_APPLICABLE; - - str_cpy = s = g_strdup (str); - while (*s) { - if (*s == ',' || *s == '"') - *s = ' '; - ++s; - } - namev = g_strsplit (str_cpy, " ", 0); - g_free (str_cpy); - - contact_name = e_contact_get (contact, E_CONTACT_NAME); - - if (contact_name->given) - givenv = g_strsplit (contact_name->given, " ", 0); - if (contact_name->additional) - addv = g_strsplit (contact_name->additional, " ", 0); - if (contact_name->family) - familyv = g_strsplit (contact_name->family, " ", 0); - - e_contact_name_free (contact_name); - - fragment_count = 0; - for (i = 0; givenv && givenv[i]; ++i) - ++fragment_count; - for (i = 0; addv && addv[i]; ++i) - ++fragment_count; - for (i = 0; familyv && familyv[i]; ++i) - ++fragment_count; - - for (i = 0; namev[i] && this_part_match != EAB_CONTACT_MATCH_PART_NONE; ++i) { - - if (*namev[i]) { - - this_part_match = EAB_CONTACT_MATCH_PART_NONE; - - /* When we are allowing partials, we are strict about the matches we allow. - Does this make sense? Not really, but it does the right thing for the purposes - of completion. */ - - if (givenv && this_part_match == EAB_CONTACT_MATCH_PART_NONE) { - for (j = 0; givenv[j]; ++j) { - if (name_fragment_match_with_synonyms (givenv[j], namev[i], allow_partial_matches)) { - - this_part_match = EAB_CONTACT_MATCH_PART_GIVEN_NAME; - - /* We remove a piece of a name once it has been matched against, so - that "john john" won't match "john doe". */ - g_free (givenv[j]); - givenv[j] = g_strdup (""); - break; - } - } - } - - if (addv && this_part_match == EAB_CONTACT_MATCH_PART_NONE) { - for (j = 0; addv[j]; ++j) { - if (name_fragment_match_with_synonyms (addv[j], namev[i], allow_partial_matches)) { - - this_part_match = EAB_CONTACT_MATCH_PART_ADDITIONAL_NAME; - - g_free (addv[j]); - addv[j] = g_strdup (""); - break; - } - } - } - - if (familyv && this_part_match == EAB_CONTACT_MATCH_PART_NONE) { - for (j = 0; familyv[j]; ++j) { - if (allow_partial_matches ? name_fragment_match_with_synonyms (familyv[j], namev[i], allow_partial_matches) - : !e_utf8_casefold_collate (familyv[j], namev[i])) { - - this_part_match = EAB_CONTACT_MATCH_PART_FAMILY_NAME; - - g_free (familyv[j]); - familyv[j] = g_strdup (""); - break; - } - } - } - - if (this_part_match != EAB_CONTACT_MATCH_PART_NONE) { - ++match_count; - matched_character_count += g_utf8_strlen (namev[i], -1); - matched_parts |= this_part_match; - if (first_matched_part == EAB_CONTACT_MATCH_PART_NONE) - first_matched_part = this_part_match; - } - } - } - - match_type = EAB_CONTACT_MATCH_NONE; - - if (this_part_match != EAB_CONTACT_MATCH_PART_NONE) { - - if (match_count > 0) - match_type = EAB_CONTACT_MATCH_VAGUE; - - if (fragment_count == match_count) { - - match_type = EAB_CONTACT_MATCH_EXACT; - - } else if (fragment_count == match_count + 1) { - - match_type = EAB_CONTACT_MATCH_PARTIAL; - - } - } - - if (matched_parts_out) - *matched_parts_out = matched_parts; - if (first_matched_part_out) - *first_matched_part_out = first_matched_part; - if (matched_character_count_out) - *matched_character_count_out = matched_character_count; - - g_strfreev (namev); - g_strfreev (givenv); - g_strfreev (addv); - g_strfreev (familyv); - - return match_type; -} - -EABContactMatchType -eab_contact_compare_name (EContact *contact1, EContact *contact2) -{ - EContactName *a, *b; - gint matches=0, possible=0; - gboolean given_match = FALSE, additional_match = FALSE, family_match = FALSE; - - g_return_val_if_fail (E_IS_CONTACT (contact1), EAB_CONTACT_MATCH_NOT_APPLICABLE); - g_return_val_if_fail (E_IS_CONTACT (contact2), EAB_CONTACT_MATCH_NOT_APPLICABLE); - - a = e_contact_get (contact1, E_CONTACT_NAME); - b = e_contact_get (contact2, E_CONTACT_NAME); - - if (a == NULL || b == NULL) - return EAB_CONTACT_MATCH_NOT_APPLICABLE; - - if (a->given && b->given) { - ++possible; - if (name_fragment_match_with_synonyms (a->given, b->given, FALSE /* both inputs are complete */)) { - ++matches; - given_match = TRUE; - } - } - - if (a->additional && b->additional) { - ++possible; - if (name_fragment_match_with_synonyms (a->additional, b->additional, FALSE /* both inputs are complete */)) { - ++matches; - additional_match = TRUE; - } - } - - if (a->family && b->family) { - ++possible; - /* We don't allow "loose matching" (i.e. John vs. Jon) on family names */ - if (! e_utf8_casefold_collate (a->family, b->family)) { - ++matches; - family_match = TRUE; - } - } - - e_contact_name_free (a); - e_contact_name_free (b); - - /* Now look at the # of matches and try to intelligently map - an EAB_CONTACT_MATCH_* type to it. Special consideration is given - to family-name matches. */ - - if (possible == 0) - return EAB_CONTACT_MATCH_NOT_APPLICABLE; - - if (possible == 1) - return family_match ? EAB_CONTACT_MATCH_VAGUE : EAB_CONTACT_MATCH_NONE; - - if (possible == matches) - return family_match ? EAB_CONTACT_MATCH_EXACT : EAB_CONTACT_MATCH_PARTIAL; - - if (possible == matches+1) - return family_match ? EAB_CONTACT_MATCH_VAGUE : EAB_CONTACT_MATCH_NONE; - - return EAB_CONTACT_MATCH_NONE; -} - - -/*** Nickname Comparisons ***/ - -EABContactMatchType -eab_contact_compare_nickname (EContact *contact1, EContact *contact2) -{ - g_return_val_if_fail (contact1 && E_IS_CONTACT (contact1), EAB_CONTACT_MATCH_NOT_APPLICABLE); - g_return_val_if_fail (contact2 && E_IS_CONTACT (contact2), EAB_CONTACT_MATCH_NOT_APPLICABLE); - - return EAB_CONTACT_MATCH_NOT_APPLICABLE; -} - - - -/*** E-mail Comparisons ***/ - -static gboolean -match_email_username (const gchar *addr1, const gchar *addr2) -{ - gint c1, c2; - if (addr1 == NULL || addr2 == NULL) - return FALSE; - - while (*addr1 && *addr2 && *addr1 != '@' && *addr2 != '@') { - c1 = isupper (*addr1) ? tolower (*addr1) : *addr1; - c2 = isupper (*addr2) ? tolower (*addr2) : *addr2; - if (c1 != c2) - return FALSE; - ++addr1; - ++addr2; - } - - return *addr1 == *addr2; -} - -static gboolean -match_email_hostname (const gchar *addr1, const gchar *addr2) -{ - gint c1, c2; - gboolean seen_at1, seen_at2; - if (addr1 == NULL || addr2 == NULL) - return FALSE; - - /* Walk to the end of each string. */ - seen_at1 = FALSE; - if (*addr1) { - while (*addr1) { - if (*addr1 == '@') - seen_at1 = TRUE; - ++addr1; - } - --addr1; - } - - seen_at2 = FALSE; - if (*addr2) { - while (*addr2) { - if (*addr2 == '@') - seen_at2 = TRUE; - ++addr2; - } - --addr2; - } - - if (!seen_at1 && !seen_at2) - return TRUE; - if (!seen_at1 || !seen_at2) - return FALSE; - - while (*addr1 != '@' && *addr2 != '@') { - c1 = isupper (*addr1) ? tolower (*addr1) : *addr1; - c2 = isupper (*addr2) ? tolower (*addr2) : *addr2; - if (c1 != c2) - return FALSE; - --addr1; - --addr2; - } - - /* This will match bob@foo.ximian.com and bob@ximian.com */ - return *addr1 == '.' || *addr2 == '.'; -} - -static EABContactMatchType -compare_email_addresses (const gchar *addr1, const gchar *addr2) -{ - if (addr1 == NULL || *addr1 == 0 || - addr2 == NULL || *addr2 == 0) - return EAB_CONTACT_MATCH_NOT_APPLICABLE; - - if (match_email_username (addr1, addr2)) - return match_email_hostname (addr1, addr2) ? EAB_CONTACT_MATCH_EXACT : EAB_CONTACT_MATCH_VAGUE; - - return EAB_CONTACT_MATCH_NONE; -} - -EABContactMatchType -eab_contact_compare_email (EContact *contact1, EContact *contact2) -{ - EABContactMatchType match = EAB_CONTACT_MATCH_NOT_APPLICABLE; - GList *contact1_email, *contact2_email; - GList *i1, *i2; - - g_return_val_if_fail (contact1 && E_IS_CONTACT (contact1), EAB_CONTACT_MATCH_NOT_APPLICABLE); - g_return_val_if_fail (contact2 && E_IS_CONTACT (contact2), EAB_CONTACT_MATCH_NOT_APPLICABLE); - - contact1_email = e_contact_get (contact1, E_CONTACT_EMAIL); - contact2_email = e_contact_get (contact2, E_CONTACT_EMAIL); - - if (contact1_email == NULL || contact2_email == NULL) { - g_list_foreach (contact1_email, (GFunc)g_free, NULL); - g_list_free (contact1_email); - - g_list_foreach (contact2_email, (GFunc)g_free, NULL); - g_list_free (contact2_email); - return EAB_CONTACT_MATCH_NOT_APPLICABLE; - } - - i1 = contact1_email; - - /* Do pairwise-comparisons on all of the e-mail addresses. If - we find an exact match, there is no reason to keep - checking. */ - while (i1 && match != EAB_CONTACT_MATCH_EXACT) { - char *addr1 = (char *) i1->data; - - i2 = contact2_email; - while (i2 && match != EAB_CONTACT_MATCH_EXACT) { - char *addr2 = (char *) i2->data; - - match = combine_comparisons (match, compare_email_addresses (addr1, addr2)); - - i2 = i2->next; - } - - i1 = i1->next; - } - - g_list_foreach (contact1_email, (GFunc)g_free, NULL); - g_list_free (contact1_email); - - g_list_foreach (contact2_email, (GFunc)g_free, NULL); - g_list_free (contact2_email); - - return match; -} - -EABContactMatchType -eab_contact_compare_address (EContact *contact1, EContact *contact2) -{ - g_return_val_if_fail (contact1 && E_IS_CONTACT (contact1), EAB_CONTACT_MATCH_NOT_APPLICABLE); - g_return_val_if_fail (contact2 && E_IS_CONTACT (contact2), EAB_CONTACT_MATCH_NOT_APPLICABLE); - - /* Unimplemented */ - - return EAB_CONTACT_MATCH_NOT_APPLICABLE; -} - -EABContactMatchType -eab_contact_compare_telephone (EContact *contact1, EContact *contact2) -{ - g_return_val_if_fail (contact1 && E_IS_CONTACT (contact1), EAB_CONTACT_MATCH_NOT_APPLICABLE); - g_return_val_if_fail (contact2 && E_IS_CONTACT (contact2), EAB_CONTACT_MATCH_NOT_APPLICABLE); - - /* Unimplemented */ - - return EAB_CONTACT_MATCH_NOT_APPLICABLE; -} - -EABContactMatchType -eab_contact_compare (EContact *contact1, EContact *contact2) -{ - EABContactMatchType result; - - g_return_val_if_fail (contact1 && E_IS_CONTACT (contact1), EAB_CONTACT_MATCH_NOT_APPLICABLE); - g_return_val_if_fail (contact2 && E_IS_CONTACT (contact2), EAB_CONTACT_MATCH_NOT_APPLICABLE); - - result = EAB_CONTACT_MATCH_NONE; - result = combine_comparisons (result, eab_contact_compare_name (contact1, contact2)); - result = combine_comparisons (result, eab_contact_compare_nickname (contact1, contact2)); - result = combine_comparisons (result, eab_contact_compare_email (contact1, contact2)); - result = combine_comparisons (result, eab_contact_compare_address (contact1, contact2)); - result = combine_comparisons (result, eab_contact_compare_telephone (contact1, contact2)); - - return result; -} - -typedef struct _MatchSearchInfo MatchSearchInfo; -struct _MatchSearchInfo { - EContact *contact; - GList *avoid; - EABContactMatchQueryCallback cb; - gpointer closure; -}; - -static void -match_search_info_free (MatchSearchInfo *info) -{ - if (info) { - g_object_unref (info->contact); - - /* This should already have been deallocated, but just in case... */ - if (info->avoid) { - g_list_foreach (info->avoid, (GFunc) g_object_unref, NULL); - g_list_free (info->avoid); - info->avoid = NULL; - } - - g_free (info); - } -} - -static void -query_cb (EBook *book, EBookStatus status, GList *contacts, gpointer closure) -{ - /* XXX we need to free contacts */ - MatchSearchInfo *info = (MatchSearchInfo *) closure; - EABContactMatchType best_match = EAB_CONTACT_MATCH_NONE; - EContact *best_contact = NULL; - GList *remaining_contacts = NULL; - const GList *i; - - if (status != E_BOOK_ERROR_OK) { - info->cb (info->contact, NULL, EAB_CONTACT_MATCH_NONE, info->closure); - match_search_info_free (info); - return; - } - - /* remove the contacts we're to avoid from the list, if they're present */ - for (i = contacts; i != NULL; i = g_list_next (i)) { - EContact *this_contact = E_CONTACT (i->data); - GList *iterator; - gboolean avoid = FALSE; - for (iterator = info->avoid; iterator; iterator = iterator->next) { - if (!strcmp (e_contact_get_const (iterator->data, E_CONTACT_UID), - e_contact_get_const (this_contact, E_CONTACT_UID))) { - avoid = TRUE; - break; - } - } - if (!avoid) - remaining_contacts = g_list_prepend (remaining_contacts, this_contact); - } - - remaining_contacts = g_list_reverse (remaining_contacts); - - for (i = remaining_contacts; i != NULL; i = g_list_next (i)) { - EContact *this_contact = E_CONTACT (i->data); - EABContactMatchType this_match = eab_contact_compare (info->contact, this_contact); - if ((gint)this_match > (gint)best_match) { - best_match = this_match; - best_contact = this_contact; - } - } - - g_list_free (remaining_contacts); - - info->cb (info->contact, best_contact, best_match, info->closure); - match_search_info_free (info); -} - -#define MAX_QUERY_PARTS 10 -static void -use_common_book_cb (EBook *book, gpointer closure) -{ - MatchSearchInfo *info = (MatchSearchInfo *) closure; - EContact *contact = info->contact; - EContactName *contact_name; - GList *contact_email; - gchar *query_parts[MAX_QUERY_PARTS]; - gint p=0; - gchar *query, *qj; - int i; - - if (book == NULL) { - info->cb (info->contact, NULL, EAB_CONTACT_MATCH_NONE, info->closure); - match_search_info_free (info); - return; - } - - contact_name = e_contact_get (contact, E_CONTACT_NAME); - if (contact_name) { - if (contact_name->given && *contact_name->given) - query_parts[p++] = g_strdup_printf ("(contains \"full_name\" \"%s\")", contact_name->given); - - if (contact_name->additional && *contact_name->additional) - query_parts[p++] = g_strdup_printf ("(contains \"full_name\" \"%s\")", contact_name->additional); - - if (contact_name->family && *contact_name->family) - query_parts[p++] = g_strdup_printf ("(contains \"full_name\" \"%s\")", contact_name->family); - - e_contact_name_free (contact_name); - } - - contact_email = e_contact_get (contact, E_CONTACT_EMAIL); - if (contact_email) { - GList *iter; - for (iter = contact_email; iter && p < MAX_QUERY_PARTS; iter = iter->next) { - gchar *addr = g_strdup (iter->data); - if (addr && *addr) { - gchar *s = addr; - while (*s) { - if (*s == '@') { - *s = '\0'; - break; - } - ++s; - } - query_parts[p++] = g_strdup_printf ("(beginswith \"email\" \"%s\")", addr); - g_free (addr); - } - } - } - g_list_foreach (contact_email, (GFunc)g_free, NULL); - g_list_free (contact_email); - - - /* Build up our full query from the parts. */ - query_parts[p] = NULL; - qj = g_strjoinv (" ", query_parts); - for(i = 0; query_parts[i] != NULL; i++) - g_free(query_parts[i]); - if (p > 0) { - query = g_strdup_printf ("(or %s)", qj); - g_free (qj); - } else { - query = qj; - } - - if (query && *query) - e_book_async_get_contacts (book, query, query_cb, info); - else - query_cb (book, E_BOOK_ERROR_OK, NULL, info); - - g_free (query); -} - -void -eab_contact_locate_match (EContact *contact, EABContactMatchQueryCallback cb, gpointer closure) -{ - MatchSearchInfo *info; - - g_return_if_fail (contact && E_IS_CONTACT (contact)); - g_return_if_fail (cb != NULL); - - info = g_new (MatchSearchInfo, 1); - info->contact = contact; - g_object_ref (contact); - info->cb = cb; - info->closure = closure; - info->avoid = NULL; - - addressbook_load_default_book (use_common_book_cb, info); -} - -/** - * e_contact_locate_match_full: - * @book: The book to look in. If this is NULL, use the default - * addressbook. - * @contact: The contact to compare to. - * @avoid: A list of contacts to not match. These will not show up in the search. - * @cb: The function to call. - * @closure: The closure to add to the call. - * - * Look for the best match and return it using the EABContactMatchQueryCallback. - **/ -void -eab_contact_locate_match_full (EBook *book, EContact *contact, GList *avoid, EABContactMatchQueryCallback cb, gpointer closure) -{ - MatchSearchInfo *info; - - g_return_if_fail (contact && E_IS_CONTACT (contact)); - g_return_if_fail (cb != NULL); - - info = g_new (MatchSearchInfo, 1); - info->contact = contact; - g_object_ref (contact); - info->cb = cb; - info->closure = closure; - info->avoid = g_list_copy (avoid); - g_list_foreach (info->avoid, (GFunc) g_object_ref, NULL); - - if (book) - use_common_book_cb (book, info); - else - addressbook_load_default_book (use_common_book_cb, info); -} - |