diff options
-rw-r--r-- | camel/ChangeLog | 40 | ||||
-rw-r--r-- | camel/Makefile.am | 5 | ||||
-rw-r--r-- | camel/camel-internet-address.c | 42 | ||||
-rw-r--r-- | camel/camel-mime-part.c | 8 | ||||
-rw-r--r-- | camel/camel-mime-utils.c | 228 | ||||
-rw-r--r-- | camel/camel-mime-utils.h | 5 | ||||
-rw-r--r-- | camel/providers/smtp/camel-smtp-transport.c | 15 |
7 files changed, 266 insertions, 77 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index c9f3e2bf80..2867f7e9b6 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,5 +1,45 @@ 2000-12-11 Not Zed <NotZed@HelixCode.com> + * providers/smtp/camel-smtp-transport.c (smtp_data): Remove use of + linewrap filter. Headers are now wrapped. encode_8bit already + enforces a 998 octet line limit. + (smtp_data): Also fixed a memleak, we always have to unref our own + copy of the filters. We also dont need to remove them manually, + so dont bother. The type's an int too ... + + * camel-internet-address.c (internet_unformat): When scanning past + quotes, remove them also. + (camel_internet_address_format_address): If the name contains "'s, + or ','s then strip and quotes and wrap the whole lot in one set of + quotes. + + * Makefile.am (noinst_HEADERS): We dont want to install + camel-charset-map-private.h, ever. There are probably other + similar files ..? + + * camel-mime-part.c (write_to_stream): Fold header lines + appropriately as we're writing them out. + + * camel-mime-utils.c (header_fold): Add a new argument, headerlen, + tells it how long the associated header token is. + (header_fold): Also,k check to see if we need to fold first, using + a better algorithm, and also accept already-folded lines, and + re-process accordingly. + (rfc2047_decode_word): Add a little buffer space to iconv output + for shifting overheads? + (rfc2047_decode_word): finish the iconv with a null call, to flush + shift state, etc. + (rfc2047_encode_word): Attempt to break up long words into + appropriately sized, independent, chunks. See rfc2047, section 2. + (header_decode_mailbox): Dont add in extra spaces into the output + if we are decoding adjacent encoded words. We can only guess this + case, as some broken mailers put encoded words inside quoted + words. + (header_encode_phrase): Dont merge words if they are going to end + up too long. Also change back ot only merge consecutive words of + the same type. e.g. 'foo. blah fum.' -> "foo." blah "fum." or + 'iam an. idiot' -> iam "an." idiot + * camel-medium.c (camel_medium_set_header): Hrm, we actually want to call set_header, not add_header here, probably explains some duplicate X-Evolution headers i was trying to track down. Also diff --git a/camel/Makefile.am b/camel/Makefile.am index cb2924e467..bff90e3d89 100644 --- a/camel/Makefile.am +++ b/camel/Makefile.am @@ -71,7 +71,6 @@ libcamelinclude_HEADERS = \ broken-date-parser.h \ camel-address.h \ camel-charset-map.h \ - camel-charset-map-private.h \ camel-data-wrapper.h \ camel-exception-list.def \ camel-exception.h \ @@ -126,6 +125,10 @@ libcamel_la_LDFLAGS = -version-info 0:0:0 -rpath $(libdir) libcamel_la_LIBADD = $(top_builddir)/e-util/libeutil.la $(UNICODE_LIBS) + +noinst_HEADERS = \ + camel-charset-map-private.h + EXTRA_DIST = \ README diff --git a/camel/camel-internet-address.c b/camel/camel-internet-address.c index 1bcd532625..32e383c093 100644 --- a/camel/camel-internet-address.c +++ b/camel/camel-internet-address.c @@ -162,10 +162,16 @@ internet_unformat(CamelAddress *a, const char *raw) do { c = (unsigned char)*p++; switch (c) { - /* HMMM. Not sure we need this, we dont quote the names anyway ... */ + /* removes quotes, they should only be around the total name anyway */ case '"': - while (*p && *p != '"') - p++; + p[-1] = ' '; + while (*p) + if (*p == '"') { + *p++ = ' '; + break; + } else { + p++; + } break; case '<': if (name == NULL) @@ -186,7 +192,7 @@ internet_unformat(CamelAddress *a, const char *raw) name = g_strstrip(name); addr = g_strstrip(addr); if (addr[0]) { - d(printf("found address: %s <%s>\n", name, addr)); + d(printf("found address: '%s' <%s>\n", name, addr)); camel_internet_address_add((CamelInternetAddress *)a, name, addr); } name = NULL; @@ -419,8 +425,8 @@ camel_internet_address_encode_address(const char *real, const char *addr) /** * camel_internet_address_format_address: - * @name: - * @addr: + * @name: A name, quotes may be stripped from it. + * @addr: Assumes a valid rfc822 email address. * * Function to format a single address, suitable for display. * @@ -433,11 +439,27 @@ camel_internet_address_format_address(const char *name, const char *addr) g_assert(addr); -#warning "If name contains a quote, then we're thrown for six ... " - if (name && name[0]) + if (name && name[0]) { + const char *p = name; + char *o, c; + + while ((c = *p++)) { + if (c == '\"' || c == ',') { + o = ret = g_malloc(strlen(name)+3+strlen(addr)+3 + 1); + p = name; + *o++ = '\"'; + while ((c = *p++)) + if (c != '\"') + *o++ = c; + *o++ = '\"'; + sprintf(o, " <%s>", addr); + d(printf("encoded '%s' => '%s'\n", name, ret)); + return ret; + } + } ret = g_strdup_printf("%s <%s>", name, addr); - else - ret = g_strdup_printf("%s", addr); + } else + ret = g_strdup(addr); return ret; } diff --git a/camel/camel-mime-part.c b/camel/camel-mime-part.c index 86ac9c0b13..e553e60257 100644 --- a/camel/camel-mime-part.c +++ b/camel/camel-mime-part.c @@ -494,13 +494,17 @@ write_to_stream(CamelDataWrapper *data_wrapper, CamelStream *stream) if (mp->headers) { struct _header_raw *h = mp->headers; + char *val; while (h) { - if (h->value == NULL){ + val = h->value; + if (val == NULL) { g_warning("h->value is NULL here for %s", h->name); count = 0; } else { - count = camel_stream_printf(stream, "%s%s%s\n", h->name, isspace(h->value[0]) ? ":" : ": ", h->value); + val = header_fold(val, strlen(h->name)); + count = camel_stream_printf(stream, "%s%s%s\n", h->name, isspace(val[0]) ? ":" : ": ", val); + g_free(val); } if (count == -1) return -1; diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c index ccdd03634e..425c77d35d 100644 --- a/camel/camel-mime-utils.c +++ b/camel/camel-mime-utils.c @@ -877,7 +877,7 @@ rfc2047_decode_word(const char *in, int len) int inlen, outlen; iconv_t ic; - d(printf("decoding '%.*s'\n", len, in)); + d(printf("rfc2047: decoding '%.*s'\n", len, in)); /* just make sure we're not passed shit */ if (len<7 @@ -916,7 +916,7 @@ rfc2047_decode_word(const char *in, int len) inbuf = decword; - outlen = inlen*6; + outlen = inlen*6+16; outbase = alloca(outlen); outbuf = outbase; @@ -924,11 +924,12 @@ rfc2047_decode_word(const char *in, int len) ic = iconv_open("UTF-8", encname); if (ic != (iconv_t)-1) { ret = iconv(ic, (const char **)&inbuf, &inlen, &outbuf, &outlen); - iconv_close(ic); if (ret>=0) { + iconv(ic, NULL, 0, &outbuf, &outlen); *outbuf = 0; decoded = g_strdup(outbase); } + iconv_close(ic); } else { w(g_warning("Cannot decode charset, header display may be corrupt: %s: %s", encname, strerror(errno))); /* TODO: Should this do this, or just leave the encoded strings? */ @@ -1095,46 +1096,109 @@ header_decode_string(const char *in) return header_decode_text(in, strlen(in)); } +/* how long a sequence of pre-encoded words should be less than, to attempt to + fit into a properly folded word. Only a guide. */ +#define CAMEL_FOLD_PREENCODED (24) + /* FIXME: needs a way to cache iconv opens for different charsets? */ static void rfc2047_encode_word(GString *outstring, const char *in, int len, const char *type, unsigned short safemask) { - iconv_t ic; + iconv_t ic = (iconv_t *)-1; char *buffer, *out, *ascii; - size_t inlen, outlen, enclen; + size_t inlen, outlen, enclen, bufflen; + const char *inptr, *p; + int first = 1; - d(printf("Converting '%.*s' to %s\n", len, in, type)); + d(printf("Converting [%d] '%.*s' to %s\n", len, len, in, type)); /* convert utf8->encoding */ - outlen = len*6; - buffer = alloca(outlen); + bufflen = len*6+16; + buffer = alloca(bufflen); inlen = len; - out = buffer; - - /* if we can't convert from utf-8, just encode as utf-8 */ - if (!strcasecmp(type, "UTF-8") - || (ic = iconv_open(type, "UTF-8")) == (iconv_t)-1) { - memcpy(buffer, in, len); - out = buffer+len; - type = "UTF-8"; - } else { - if (iconv(ic, &in, &inlen, &out, &outlen) == -1) { - w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno))); + inptr = in; + + ascii = alloca(bufflen); + + if (strcasecmp(type, "UTF-8") != 0) + ic = iconv_open(type, "UTF-8"); + + while (inlen) { + int convlen, i, proclen; + + /* break up words into smaller bits, what we really want is encoded + overhead < 75, + but we'll just guess what that means in terms of input chars, and assume its good enough */ + + out = buffer; + outlen = bufflen; + + if (ic == (iconv_t) -1) { + /* native encoding case, the easy one (?) */ + /* we work out how much we can convert, and still be in length */ + /* proclen will be the result of input characters that we can convert, to the nearest + (approximated) valid utf8 char */ + convlen = 0; + proclen = 0; + p = inptr; + i = 0; + while (p < (in+len) && convlen < (75 - strlen("=?utf-8?q??="))) { + unsigned char c = *p++; + + if (c >= 0xc0) + proclen = i; + i++; + if (c < 0x80) + proclen = i; + if (camel_mime_special_table[c] & safemask) + convlen += 1; + else + convlen += 3; + } + /* well, we probably have broken utf8, just copy it anyway what the heck */ + if (proclen == 0) { + w(g_warning("Appear to have truncated utf8 sequence")); + proclen = inlen; + } + memcpy(out, inptr, proclen); + inptr += proclen; + inlen -= proclen; + out += proclen; + } else { + /* well we could do similar, but we can't (without undue effort), we'll just break it up into + hopefully-small-enough chunks, and leave it at that */ + convlen = MIN(inlen, CAMEL_FOLD_PREENCODED); + p = inptr; + if (iconv(ic, &inptr, &convlen, &out, &outlen) == -1) { + w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno))); + /* blah, we include it anyway, better than infinite loop ... */ + inptr = p + convlen; + } else { + /* make sure we flush out any shift state */ + iconv(ic, NULL, 0, &out, &outlen); + } + inlen -= (inptr - p); } - iconv_close(ic); - } - enclen = out-buffer; - /* now create qp version */ - ascii = alloca(enclen*3 + strlen(type) + 8); - out = ascii; - /* should determine which encoding is smaller, and use that? */ - out += sprintf(out, "=?%s?Q?", type); - out += quoted_encode(buffer, enclen, out, safemask); - sprintf(out, "?="); + enclen = out-buffer; + + /* create token */ + out = ascii; + if (first) + first = 0; + else + *out++ = ' '; + out += sprintf(out, "=?%s?Q?", type); + out += quoted_encode(buffer, enclen, out, safemask); + sprintf(out, "?="); + + d(printf("converted part = %s\n", ascii)); - d(printf("converted = %s\n", ascii)); - g_string_append(outstring, ascii); + g_string_append(outstring, ascii); + } + + if (ic == (iconv_t) -1) { + iconv_close(ic); + } } @@ -1162,7 +1226,6 @@ header_encode_string(const unsigned char *in) /* This gets each word out of the input, and checks to see what charset can be used to encode it. */ /* TODO: Work out when to merge subsequent words, or across word-parts */ - /* FIXME: Make sure a converted word is less than the encoding size */ out = g_string_new(""); inptr = in; encoding = 0; @@ -1275,6 +1338,20 @@ header_encode_phrase(const unsigned char *in) out = g_string_new(""); +#if 0 + { + int i; + + printf("encoding phrase: %s\n", in); + for (i=0;in[i];i++) { + printf(" %02x", in[i]); + if (((i) & 15) == 15) + printf("\n"); + } + printf("\n"); + } +#endif + /* break the input into words */ type = WORD_ATOM; count = 0; @@ -1338,12 +1415,18 @@ header_encode_phrase(const unsigned char *in) nextl = g_list_next(wordl); while (nextl) { next = nextl->data; - /* merge nodes of the same (or lower?) type*/ - if (word->type == next->type || (next->type < word->type && word->type < WORD_2047) ) { - word->end = next->end; - words = g_list_remove_link(words, nextl); - g_free(next); - nextl = g_list_next(wordl); + /* merge nodes of the same type AND we are not creating too long a string */ + if (word->type == next->type) { + if (next->end - word->start < CAMEL_FOLD_PREENCODED) { + word->end = next->end; + words = g_list_remove_link(words, nextl); + g_free(next); + nextl = g_list_next(wordl); + } else { + /* if it is going to be too long, make sure we include the separating whitespace */ + word->end = next->start; + break; + } } else { break; } @@ -1377,7 +1460,12 @@ header_encode_phrase(const unsigned char *in) if (nextl) { int i; next = nextl->data; - for (i=next->start-word->end;i>0;i--) + /* if they are adjacent, it means we already had the spaces encoded internally, + so now we just need to output 1 space */ + i=next->start-word->end; + if (i==0) + i=1; + for (;i>0;i--) out = g_string_append_c(out, ' '); } @@ -1822,17 +1910,27 @@ header_decode_mailbox(const char **in) /* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */ name = g_string_new(""); while (pre) { - char *text; + char *text, *last; - /* perform internationalised decoding, and appent */ + /* perform internationalised decoding, and append */ text = header_decode_string(pre); name = g_string_append(name, text); - g_free(pre); + last = pre; g_free(text); pre = header_decode_word(&inptr); - if (pre) - name = g_string_append_c(name, ' '); + if (pre) { + int l = strlen(last); + int p = strlen(pre); + /* dont append ' ' between sucsessive encoded words */ + if ((l>6 && last[l-2] == '?' && last[l-1] == '=') + && (p>6 && pre[0] == '=' && pre[1] == '?')) { + /* dont append ' ' */ + } else { + name = g_string_append_c(name, ' '); + } + } + g_free(last); } header_decode_lwsp(&inptr); if (*inptr == '<') { @@ -2999,21 +3097,45 @@ header_address_list_format(struct _header_address *a) } /* simple header folding */ -/* note: assumes the input has not already been folded */ +/* will work even if the header is already folded */ char * -header_fold(const char *in) +header_fold(const char *in, int headerlen) { int len, outlen, i; - const char *inptr = in, *space; + const char *inptr = in, *space, *p, *n; GString *out; char *ret; + int needunfold = FALSE; + + if (in == NULL) + return NULL; - len = strlen(in); - if (len <= CAMEL_FOLD_SIZE) + /* first, check to see if we even need to fold */ + len = headerlen + 2; + p = in; + while (*p) { + n = strchr(p, '\n'); + if (n == NULL) { + n = p+strlen(p); + } else { + needunfold = TRUE; + } + len += n-p; + + if (len >= CAMEL_FOLD_SIZE) + break; + len = 0; + p = n; + } + if (len < CAMEL_FOLD_SIZE) return g_strdup(in); + /* we need to fold, so first unfold (if we need to), then process */ + if (needunfold) + inptr = in = header_unfold(in); + out = g_string_new(""); - outlen = 0; + outlen = headerlen+2; while (*inptr) { space = strchr(inptr, ' '); if (space) { @@ -3021,7 +3143,9 @@ header_fold(const char *in) } else { len = strlen(inptr); } + printf("next word '%.*s'\n", len, inptr); if (outlen + len > CAMEL_FOLD_SIZE) { + printf("outlen = %d wordlen = %d\n", outlen, len); g_string_append(out, "\n\t"); outlen = 1; /* check for very long words, just cut them up */ @@ -3042,6 +3166,10 @@ header_fold(const char *in) } ret = out->str; g_string_free(out, FALSE); + + if (needunfold) + g_free((char *)in); + return ret; } diff --git a/camel/camel-mime-utils.h b/camel/camel-mime-utils.h index 9bcdb063ac..119eda9626 100644 --- a/camel/camel-mime-utils.h +++ b/camel/camel-mime-utils.h @@ -26,7 +26,7 @@ #include <time.h> /* maximum size of a line from header_fold() */ -#define CAMEL_FOLD_SIZE (72) +#define CAMEL_FOLD_SIZE (77) /* a list of references for this message */ struct _header_references { @@ -138,10 +138,11 @@ const char *header_raw_find(struct _header_raw **list, const char *name, int *of const char *header_raw_find_next(struct _header_raw **list, const char *name, int *ofset, const char *last); void header_raw_replace(struct _header_raw **list, const char *name, const char *value, int offset); void header_raw_remove(struct _header_raw **list, const char *name); +void header_raw_fold(struct _header_raw **list); void header_raw_clear(struct _header_raw **list); /* fold a header */ -char *header_fold(const char *in); +char *header_fold(const char *in, int headerlen); char *header_unfold(const char *in); /* decode a header which is a simple token */ diff --git a/camel/providers/smtp/camel-smtp-transport.c b/camel/providers/smtp/camel-smtp-transport.c index a4115c60e8..2d20cadce8 100644 --- a/camel/providers/smtp/camel-smtp-transport.c +++ b/camel/providers/smtp/camel-smtp-transport.c @@ -599,13 +599,11 @@ smtp_data (CamelSmtpTransport *transport, CamelMedium *message, gboolean has_8bi /* now we can actually send what's important :p */ gchar *cmdbuf, *respbuf = NULL; CamelStreamFilter *filtered_stream; - CamelMimeFilter *crlffilter, *lwfilter; - gint crlfid, lwid; - + CamelMimeFilter *crlffilter; /* if the message contains 8bit mime parts and the server doesn't support it, encode 8bit parts to the best - encoding. */ + encoding. This will also enforce an encoding to keep the lines in limit */ if (has_8bit_parts && !CAMEL_TRANSPORT (transport)->supports_8bit) camel_mime_message_encode_8bit_parts (CAMEL_MIME_MESSAGE (message)); @@ -641,12 +639,9 @@ smtp_data (CamelSmtpTransport *transport, CamelMedium *message, gboolean has_8bi respbuf = NULL; /* setup stream filtering */ - lwfilter = camel_mime_filter_linewrap_new (998, 998, '\t'); crlffilter = camel_mime_filter_crlf_new (CAMEL_MIME_FILTER_CRLF_ENCODE, CAMEL_MIME_FILTER_CRLF_MODE_CRLF_DOTS); - filtered_stream = camel_stream_filter_new_with_stream (transport->ostream); - lwid = camel_stream_filter_add (filtered_stream, CAMEL_MIME_FILTER (lwfilter)); - crlfid = camel_stream_filter_add (filtered_stream, CAMEL_MIME_FILTER (crlffilter)); + camel_stream_filter_add (filtered_stream, CAMEL_MIME_FILTER (crlffilter)); if (camel_data_wrapper_write_to_stream (CAMEL_DATA_WRAPPER (message), CAMEL_STREAM (filtered_stream)) == -1) { camel_exception_setv (ex, CAMEL_EXCEPTION_SYSTEM, @@ -654,15 +649,11 @@ smtp_data (CamelSmtpTransport *transport, CamelMedium *message, gboolean has_8bi "%s: mail not sent"), g_strerror (errno)); - camel_stream_filter_remove (filtered_stream, lwid); - camel_stream_filter_remove (filtered_stream, crlfid); camel_object_unref (CAMEL_OBJECT (filtered_stream)); return FALSE; } - camel_stream_filter_remove (filtered_stream, lwid); - camel_stream_filter_remove (filtered_stream, crlfid); camel_stream_flush (CAMEL_STREAM (filtered_stream)); camel_object_unref (CAMEL_OBJECT (filtered_stream)); |