More work on RFC 2047 encoder.

svn path=/trunk/; revision=1134
author: Robert Brady <rbrady@src.gnome.org> 1999-08-20 01:33:49 +0800
committer: Robert Brady <rbrady@src.gnome.org> 1999-08-20 01:33:49 +0800
commit: 95697142edb4d62c7f82cb915857597f095a0155 (patch)
tree: 648593dfb54b4f6b0623e04e226d8096c03ef935
parent: 6f38fd46e6bed032688514ecc22d6a8d9634d817 (diff)
download: gsoc2013-evolution-95697142edb4d62c7f82cb915857597f095a0155.tar.gz
gsoc2013-evolution-95697142edb4d62c7f82cb915857597f095a0155.tar.zst
gsoc2013-evolution-95697142edb4d62c7f82cb915857597f095a0155.zip
2 files changed, 160 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 9d7130ce9a..ea06f9f5d9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+1999-08-18  Robert Brady  <rwb197@ecs.soton.ac.uk>
+
+	* camel/gmime-rfc2047.c: more work on encoder.
+
 1999-08-17  bertrand  <Bertrand.Guiheneuf@aful.org>
 
 	* camel/camel-stream.c (camel_stream_read): 
diff --git a/camel/gmime-rfc2047.c b/camel/gmime-rfc2047.c
index 72f376d00c..d9ac5f3f9e 100644
--- a/camel/gmime-rfc2047.c
+++ b/camel/gmime-rfc2047.c
@@ -122,7 +122,7 @@ build_base64_rank_table (void)
 }
 
 
-gchar *
+static gchar *
 rfc2047_decode_word (const gchar *data, const gchar *into_what) 
 {
 	const char *charset = strstr (data, "=?"), *encoding, *text, *end;
@@ -161,7 +161,7 @@ rfc2047_decode_word (const gchar *data, const gchar *into_what)
 		char *cook_2 = cooked_data;
 		int cook_len = strlen (cook_2);
 		int b_len = 4096;
-		iconv_t i;
+		unicode_iconv_t i;
 		strncpy (q, charset, c - charset);
 		q[c - charset] = 0;
 		i = unicode_iconv_open (into_what, q);
@@ -237,10 +237,10 @@ gmime_rfc2047_decode (const gchar *data, const gchar *into_what)
 #define isnt_ascii(a) ((a) <= 0x1f || (a) >= 0x7f)
 
 static int 
-rfc2047_clean (const gchar *string) {
-	if (strstr (string, "?=")) return 1;
-	while (*string) {
-		if (!isnt_ascii ((unsigned char)*string))
+rfc2047_clean (const gchar *string, const gchar *max) {
+	//	if (strstr (string, "?=")) return 1;
+	while (string < max) {
+		if (isnt_ascii ((unsigned char)*string))
 			return 0;
 		string++;
 	}
@@ -248,15 +248,17 @@ rfc2047_clean (const gchar *string) {
 }
 
 static gchar *
-encode_word (const gchar *string, const gchar *said_charset) 
+encode_word (const gchar *string, int length, const gchar *said_charset) 
 {
-	if (rfc2047_clean(string)) 
+	const gchar *max = string + length;
+	if (rfc2047_clean(string, max)) {
 		/* don't bother encoding it if it has no odd characters in it */
-		return g_strdup (string);
+		return g_strndup (string, length);
+	}
 	{
-		char *temp = malloc (strlen(string) * 4 + 1), *t = temp;
+		char *temp = malloc (length * 4 + 1), *t = temp;
 		t += sprintf (t, "=?%s?q?", said_charset);
-		while (*string) {
+		while (string < max) {
 			if (*string == ' ')
 				*(t++) = '_';
 			else if ((*string <= 0x1f) || (*string >= 0x7f) || (*string == '=') || (*string == '?')) 
@@ -272,16 +274,112 @@ encode_word (const gchar *string, const gchar *said_charset)
 	}
 }
 
+static int
+words_in(char *a) 
+{
+	int words = 1;
+	while (*a) {
+		if (*(a++)==' ')
+			words++;
+	}
+	return words;
+}
+
+struct word_data {
+	const char *word;
+	int word_length;
+	const char *to_encode_in;
+	char *encoded;
+	enum {
+		wt_None,
+		wt_Address,
+	} type;
+};
+
+static int string_can_fit_in(const char *a, int length, const char *charset) 
+{
+	while (length--) {
+		if (*a < 0x1f || *a >= 0x7f) return 0;
+		a++;
+	}
+	return 1;
+}
+
+static void
+show_entry(struct word_data *a) 
+{
+	a->type = wt_None;
+	
+	if (string_can_fit_in(a->word, a->word_length, "US-ASCII"))
+		a->to_encode_in = "US-ASCII";
+
+	if (a->word[0]=='<' && a->word[a->word_length-1]=='>') {
+		a->type = wt_Address;
+	}
+}
+
+static void
+break_into_words(const char *string, struct word_data *a, int words) 
+{
+	int i;
+	for (i=0;i<words;i++) {
+		
+		char *next_space = strchr(string, ' ');
+
+		if (!next_space) {
+			a[i].word = string;
+			a[i].word_length = strlen(string);
+			a[i].to_encode_in = NULL; /* i.e. the default */
+
+			show_entry(a+i);
+
+			return;
+		}
+
+		a[i].word = string;
+		a[i].word_length = next_space - string;
+		a[i].to_encode_in = NULL;
+
+		show_entry(a+i);
+
+		string = next_space + 1;
+
+	}
+}
+
+static void
+join_words(struct word_data *a, int words)
+{
+	int i;
+	for (i=(words-1);i>0;i--) {
+		if (a[i].to_encode_in == a[i-1].to_encode_in) {
+			a[i-1].word_length += 1 + a[i].word_length;
+			a[i].word = 0;
+			a[i].word_length = 0;
+		}
+
+	}
+}
+
+static void show_words(struct word_data *words, int count) 
+{
+	int i;
+	for (i=0;i<count;i++)
+		if (words[i].word)
+			show_entry(words+i);
+}
+
 gchar *
 gmime_rfc2047_encode (const gchar *string, const gchar *charset) 
 {
-	int temp_len = strlen (string)*4 + 1;
+	int temp_len = strlen (string)*4 + 1, word_count;
 	char *temp = g_malloc (temp_len), *temp_2 = temp;
 	int string_length = strlen (string);
-	char *encoded = NULL;
+	char *encoded = NULL, *p;
+	struct word_data *words;
 
 	/* first, let us convert to UTF-8 */
-	iconv_t i = unicode_iconv_open ("UTF-8", charset);
+	unicode_iconv_t i = unicode_iconv_open ("UTF-8", charset);
 	unicode_iconv (i, &string, &string_length, &temp_2, &temp_len);
 	unicode_iconv_close (i);
 	
@@ -289,8 +387,43 @@ gmime_rfc2047_encode (const gchar *string, const gchar *charset)
 	*temp_2 = 0;
 
 	/* now encode it as if it were a single word */
-	encoded = encode_word (temp, "UTF-8");
 	
+	word_count = words_in ( temp );
+
+        words = g_malloc(sizeof (struct word_data) * word_count);
+	break_into_words(temp, words, word_count);
+	
+	join_words(words, word_count);
+
+	show_words(words, word_count);
+
+	{
+		size_t len = 0;
+		int c = 0;
+		for (c = 0;c<word_count;c++) {
+			if (words[c].word)
+				{
+					words[c].encoded = encode_word(words[c].word, words[c].word_length, 
+							      words[c].to_encode_in ? words[c].to_encode_in :
+							      "UTF-8");
+					len += strlen(words[c].encoded) + 1;
+				}
+		}
+
+		{ 
+		        encoded = g_malloc(len+1);
+			p = encoded;
+			for (c = 0; c < word_count;c++) if (words[c].word) {
+				strcpy(p, words[c].encoded);
+				p += strlen(p);
+				strcpy(p, " ");
+				p++;
+			}
+			*p = 0;
+		}
+	}
+
+
 	/*
 	  
 	  real algorithm :
@@ -339,8 +472,15 @@ gmime_rfc2047_encode (const gchar *string, const gchar *charset)
 	  the text is just in US-ASCII : like 99% of the text that will
 	  pass through it)
 	  
+
+
+	  current status :
+
+	    Algorithm now partially implemented.
+
 	*/
-	
+
+	g_free(words);
         g_free(temp);
 	
 	return encoded;
author	Robert Brady <rbrady@src.gnome.org>	1999-08-20 01:33:49 +0800
committer	Robert Brady <rbrady@src.gnome.org>	1999-08-20 01:33:49 +0800
commit	95697142edb4d62c7f82cb915857597f095a0155 (patch)
tree	648593dfb54b4f6b0623e04e226d8096c03ef935
parent	6f38fd46e6bed032688514ecc22d6a8d9634d817 (diff)
download	gsoc2013-evolution-95697142edb4d62c7f82cb915857597f095a0155.tar.gz gsoc2013-evolution-95697142edb4d62c7f82cb915857597f095a0155.tar.zst gsoc2013-evolution-95697142edb4d62c7f82cb915857597f095a0155.zip