From 2bbd6e4180c90540f5be5c87736099229d234d60 Mon Sep 17 00:00:00 2001
From: Robert Brady <rwb197@ecs.soton.ac.uk>
Date: Fri, 6 Aug 1999 14:25:09 +0000
Subject: test for RFC2047 decoder.

1998-08-06  Robert Brady  <rwb197@ecs.soton.ac.uk>

	* tests/test5.c: test for RFC2047 decoder.

	* camel/gmime-rfc2047.c: Improved RFC2047 decoder.

svn path=/trunk/; revision=1088
---
 ChangeLog             |   6 ++
 camel/gmime-rfc2047.c | 179 +++++++++++++++++++++++++++-----------------------
 tests/.cvsignore      |   5 ++
 tests/test5.c         |  60 +++++++++++++++++
 4 files changed, 168 insertions(+), 82 deletions(-)
 create mode 100644 tests/test5.c

diff --git a/ChangeLog b/ChangeLog
index e067b02f92..f65242e705 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+1998-08-06  Robert Brady  <rwb197@ecs.soton.ac.uk>
+	
+	* tests/test5.c: test for RFC2047 decoder.
+
+	* camel/gmime-rfc2047.c: Improved RFC2047 decoder.
+
 1999-08-06  bertrand  <Bertrand.Guiheneuf@aful.org>
 
 	* camel/providers/MH/camel-mh-folder.c (_exists): add debug information
diff --git a/camel/gmime-rfc2047.c b/camel/gmime-rfc2047.c
index 7266cb1159..2a44a20dd6 100644
--- a/camel/gmime-rfc2047.c
+++ b/camel/gmime-rfc2047.c
@@ -49,10 +49,11 @@ hexval (gchar c) {
 	return c - 'a' + 10;
 }
 
-static void 
-decode_quoted (const gchar *text, gchar *to) 
-{
-	while (*text) {
+static gchar *
+decode_quoted(const gchar *text, const gchar *end) {
+	gchar *to = malloc(end - text + 1), *to_2 = to;
+        if (!to) return NULL;
+	while (*text && text < end) {
 		if (*text == '=') {
 			gchar a = hexval (text[1]);
 			gchar b = hexval (text[2]);
@@ -70,20 +71,23 @@ decode_quoted (const gchar *text, gchar *to)
 			text++;
 		}
 	}
-	*to = 0;
+	return to_2;
 }
 
-static void 
-decode_base64 (const gchar *what, gchar *where) 
-{
+static gchar *
+decode_base64(const gchar *data, const gchar *end) {
 	unsigned short pattern = 0;
 	int bits = 0;
 	int delimiter = '=';
 	gchar x;
-	gchar *t = where;
+	gchar *buffer = g_malloc((end - data) * 3);
+	gchar *t = buffer;
 	int Q = 0;
-	while (*what != delimiter) {
-		x = base64_rank[(unsigned char)(*what++)];
+
+	if (!buffer) return NULL;
+
+	while (*data != delimiter) {
+		x = base64_rank[(unsigned char)(*data++)];
 		if (x == NOT_RANKED)
 			continue;
 		pattern <<= 6;
@@ -97,6 +101,7 @@ decode_base64 (const gchar *what, gchar *where)
 		}
 	}
 	*t = 0;
+	return buffer;
 }
 
 static void
@@ -113,86 +118,96 @@ build_base64_rank_table (void)
 	}
 }
 
-gchar 
-*gmime_rfc2047_decode (const gchar *data, const gchar *into_what) 
+
+gchar*
+rfc2047_decode_word (const gchar *data, const gchar *into_what) 
 {
-	gchar buffer[4096] /* FIXME : constant sized buffer */, *b = buffer;
+	const char *charset = strstr(data, "=?"), *encoding, *text, *end;
+
+	char *buffer, *b, *cooked_data;
 	
+	buffer = g_malloc(strlen(data) * 2);
+	b = buffer;
+
+	if (!charset) return strdup(data);
+	charset+=2;
+
+	encoding = strchr(charset, '?');
+	if (!encoding) return strdup(data);
+	encoding++;
+
+	text = strchr(encoding, '?');
+	if (!text) return strdup(data);
+	text++;
+
+	end = strstr(text, "?=");
+	if (!end) return strdup(data);
+
+	b[0] = 0;
+
+	if (toupper(*encoding)=='Q')
+		cooked_data = decode_quoted(text, end);
+	else if (toupper(*encoding)=='B')
+		cooked_data = decode_base64(text, end);
+	else
+		return g_strdup(data);
+
+	{
+		char *c = strchr(charset, '?');
+		char *q = g_malloc(c - charset + 1);
+		char *cook_2 = cooked_data;
+		int cook_len = strlen(cook_2);
+		int b_len = 4096;
+		iconv_t i;
+		strncpy(q, charset, c - charset);
+		i = unicode_iconv_open(into_what, q);
+		if (!i) {
+			g_free(q);
+			return g_strdup(buffer);
+		}
+		unicode_iconv(i, &cook_2, &cook_len, &b, &b_len);
+		unicode_iconv_close(i);
+	}
+
+	return g_strdup(buffer);
+}
+
+gchar *
+gmime_rfc2047_decode (const gchar *data, const gchar *into_what) 
+{
+	char *buffer = malloc(strlen(data) * 4), *b = buffer;
+
+	int was_encoded_word = 0;
+
 	build_base64_rank_table ();
-	
-	while (*data) {
-		
-		/* If we encounter an error we just break out of the loop and copy the rest
-		 * of the text as-is */
-		
-		if (*data=='=') {
-			data++;
-			if (*data=='?') {
-				gchar *charset, *encoding, *text, *end;
-				gchar dc[4096];
-				charset = data+1;
-				encoding = strchr (charset, '?');
-				
-				if (!encoding) break;
-				encoding++;
-				text = strchr (encoding, '?');
-				if (!text) break;
-				text++;
-				end = strstr (text, "?=");
-				if (!end) break;
-				end++;
-				
-				*(encoding-1)=0;
-				*(text-1)=0;
-				*(end-1)=0;
-				
-				if (strcasecmp (encoding, "q") == 0) {
-					decode_quoted(text, dc);
-				} else if (strcasecmp (encoding, "b") == 0) {
-					decode_base64 (text, dc);
-				} else {
-					/* What to do here? */
-					break;
-				}
-				
-				{
-					int f;
-					iconv_t i;
-					const gchar *d2 = dc;
-					int l = strlen (d2), l2 = 4000;
-					
-					i = unicode_iconv_open (into_what, charset);
-					if (!i) 
-						break;
-					
-					unicode_iconv (i, &d2, &l, &b, &l2);
-					
-					unicode_iconv_close (i);
-					data = end;
-				}
+
+	while (data && *data) {
+		char *word_start = strstr(data, "=?"), *decoded;
+		if (!word_start) {
+			strcpy(b, data);
+			return buffer;
+		}
+		if (word_start != data) {
+
+			if (strspn(data, " \t\n\r") != (word_start - data)) {
+				strncpy(b, data, word_start - data);
+				b += word_start - data;
 			}
-		} else {
-			*b = *data;
-			b++;
 		}
-		
-		data++;
-		
-	}
-	
-	while (*data) {
-		*b = *data;
-		b++;
-		data++;
+		decoded = rfc2047_decode_word(word_start, into_what);
+		strcpy(b, decoded);
+		b += strlen(decoded);
+		g_free(decoded);
+
+		data = strstr(data, "?=") + 2;
 	}
-	
+
 	*b = 0;
-	
-	return g_strdup (buffer);
+	return buffer;
 }
 
 gchar 
-*rfc2047_encode (const gchar *string, const gchar *charset) 
+*gmime_rfc2047_encode (const gchar *string, const gchar *charset) 
 {
 	gchar buffer[4096] /* FIXME : constant sized buffer */;
 	gchar *b = buffer;
@@ -213,7 +228,7 @@ gchar
 		while (*s) {
 			if (*s == ' ') b += sprintf (b, "_");
 			else if (*s < 0x20 || *s >= 0x7f || *s == '=' || *s == '?' || *s == '_') {
-				b += sprintf (b, "=%2x", *s);
+				b += sprintf (b, "=%2x", (unsigned char)*s);
 			} else {
 				b += sprintf (b, "%c", *s);
 			}
diff --git a/tests/.cvsignore b/tests/.cvsignore
index 8cb5c0556b..7232b242dc 100644
--- a/tests/.cvsignore
+++ b/tests/.cvsignore
@@ -1,3 +1,8 @@
+test1
+test2
+test3
+test4
+test5
 Makefile.in
 Makefile
 .deps
diff --git a/tests/test5.c b/tests/test5.c
new file mode 100644
index 0000000000..2f47fda418
--- /dev/null
+++ b/tests/test5.c
@@ -0,0 +1,60 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+
+/* test for the RFC 2047 decoder */
+
+#include <string.h>
+#include <unicode.h>
+
+#include "gmime-utils.h"
+#include "stdio.h"
+#include "camel-log.h"
+#include "camel-mime-message.h"
+#include "camel-mime-part.h"
+#include "camel-stream.h"
+#include "camel-stream-fs.h"
+#include "camel.h"
+#include "gmime-rfc2047.h"
+
+#define TERMINAL_CHARSET "UTF-8"
+
+/* 
+ * Info on many unicode issues, including, utf-8 xterms from :
+ * 
+ *   http://www.cl.cam.ac.uk/~mgk/unicode.html
+ *
+ */
+
+const char *tests[] = 
+{ 
+/* these strings come from RFC 2047. Ought to add a few torture cases here. */
+  "=?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>",
+  "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>",
+  "=?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>",
+  "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
+  "=?ISO-8859-1?Q?Olle_J=E4rnefors?= <ojarnef@admin.kth.se>",
+  "=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= <paf@nada.kth.se>",
+  "Nathaniel Borenstein <nsb@thumper.bellcore.com> (=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)",
+  "",
+  "(=?ISO-8859-1?Q?a?=)",     /* should be displayed as           (a)   */
+  "(=?ISO-8859-1?Q?a?= b)",                                  /*   (a b) */
+  "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)",                 /*   (ab)  */
+  "(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)",                /*   (ab)  */
+  "(=?ISO-8859-1?Q?a?= \n=?ISO-8859-1?Q?b?=)",               /*   (ab)  */
+  "(=?ISO-8859-1?Q?a_b?=)",                                  /*   (a b) */
+  "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)",                /*   (ab)  */
+  NULL
+};
+  
+
+int
+main (int argc, char**argv)
+{      
+	const char **b = tests;
+	while (*b) {
+		printf("%s\n", gmime_rfc2047_decode(*b, TERMINAL_CHARSET));
+		b++;
+	}
+
+	return 0;
+
+}
-- 
cgit