diff options
author | delphij <delphij@FreeBSD.org> | 2006-02-26 04:10:15 +0800 |
---|---|---|
committer | delphij <delphij@FreeBSD.org> | 2006-02-26 04:10:15 +0800 |
commit | 37fdafc4de8bab07887a26efc7a5e8505598607f (patch) | |
tree | ccf769d31aaf680aaa3e04532c603baeb148d6c7 /chinese/docproj | |
parent | 8bc8c24c3be8f43107e2d8829a6c24797ae40cac (diff) | |
download | freebsd-ports-graphics-37fdafc4de8bab07887a26efc7a5e8505598607f.tar.gz freebsd-ports-graphics-37fdafc4de8bab07887a26efc7a5e8505598607f.tar.zst freebsd-ports-graphics-37fdafc4de8bab07887a26efc7a5e8505598607f.zip |
Add zh-docproj, a set of supportive tools to help the build of PDFs
written in far east languages (GB2312, GBK, BIG5, EUCJP, EUCKR and
UTF-8 encodings).
The code were written by intron <intron at intron dot ac>
Obtained from: The FreeBSD Simplified Chinese Project CVS
Approved by: portmgr (marcus)
Diffstat (limited to 'chinese/docproj')
-rw-r--r-- | chinese/docproj/Makefile | 36 | ||||
-rw-r--r-- | chinese/docproj/pkg-descr | 4 | ||||
-rw-r--r-- | chinese/docproj/pkg-plist | 2 | ||||
-rw-r--r-- | chinese/docproj/src/Makefile | 5 | ||||
-rw-r--r-- | chinese/docproj/src/cjktexsty/Makefile | 13 | ||||
-rw-r--r-- | chinese/docproj/src/cjktexsty/cjktexsty.l | 349 | ||||
-rw-r--r-- | chinese/docproj/src/fixrtf/Makefile | 13 | ||||
-rw-r--r-- | chinese/docproj/src/fixrtf/fixrtf.l | 527 |
8 files changed, 949 insertions, 0 deletions
diff --git a/chinese/docproj/Makefile b/chinese/docproj/Makefile new file mode 100644 index 00000000000..360dd6b87c4 --- /dev/null +++ b/chinese/docproj/Makefile @@ -0,0 +1,36 @@ +# New ports collection makefile for: zh-docproj +# Date created: 26 Feb 2006 +# Whom: Xin LI <delphij@FreeBSD.org> +# +# $FreeBSD$ +# +# This port is self contained in the src directory. +# + +PORTNAME= docproj +PORTVERSION= 0.1.20060226 +CATEGORIES= chinese +MASTER_SITES= # none +PKGNAMEPREFIX= zh- +DISTFILES= # none + +# Note: Updates from intron@intron.ac should also be +# considered as maintainer updates. + +MAINTAINER= delphij@FreeBSD.org +COMMENT= Supportive tools for Chinese docproj build + +LIB_DEPENDS= iconv:${PORTSDIR}/converters/libiconv \ + png.5:${PORTSDIR}/graphics/png + +WRKSRC= ${WRKDIR}/src + +SRC= ${.CURDIR}/src + +do-fetch: + @${DO_NADA} + +pre-patch: + @${CP} -R ${SRC} ${WRKDIR} + +.include <bsd.port.mk> diff --git a/chinese/docproj/pkg-descr b/chinese/docproj/pkg-descr new file mode 100644 index 00000000000..249bbc393af --- /dev/null +++ b/chinese/docproj/pkg-descr @@ -0,0 +1,4 @@ +zh-docproj is a set of utilities that is used to build docproj PDFs. +This utility is maintained by the FreeBSD Simplified Chinese Project. + +WWW: http://www.freebsd.org.cn diff --git a/chinese/docproj/pkg-plist b/chinese/docproj/pkg-plist new file mode 100644 index 00000000000..d8d167d1c46 --- /dev/null +++ b/chinese/docproj/pkg-plist @@ -0,0 +1,2 @@ +bin/cjktexsty +bin/fixrtf diff --git a/chinese/docproj/src/Makefile b/chinese/docproj/src/Makefile new file mode 100644 index 00000000000..1fc7625da89 --- /dev/null +++ b/chinese/docproj/src/Makefile @@ -0,0 +1,5 @@ +# $FreeBSD$ + +SUBDIR= cjktexsty fixrtf + +.include <bsd.subdir.mk> diff --git a/chinese/docproj/src/cjktexsty/Makefile b/chinese/docproj/src/cjktexsty/Makefile new file mode 100644 index 00000000000..3f00788744d --- /dev/null +++ b/chinese/docproj/src/cjktexsty/Makefile @@ -0,0 +1,13 @@ +# $FreeBSD$ + +PROG= cjktexsty +SRCS= cjktexsty.l + +PREFIX?= /usr/local +BINDIR= ${PREFIX}/bin +CFLAGS+=-I${PREFIX}/include +LDADD= -L${PREFIX}/lib -liconv +NO_MAN= +NOMAN= + +.include <bsd.prog.mk> diff --git a/chinese/docproj/src/cjktexsty/cjktexsty.l b/chinese/docproj/src/cjktexsty/cjktexsty.l new file mode 100644 index 00000000000..cb860a6609b --- /dev/null +++ b/chinese/docproj/src/cjktexsty/cjktexsty.l @@ -0,0 +1,349 @@ +%{ +/*- + * Copyright (c) 2005, 2006 intron <intron@intron.ac>. All rights reserved. + * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project. + * All rights reserved. + * + * This code is derived from software contributed to The FreeBSD Simplified + * Chinese Project by intron. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * CNPROJ: doc/zh_CN.GB2312/share/mk/cjktexsty.lex,v 1.1.1000.40 2006/02/19 20:32:32 intron Exp + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <err.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <iconv.h> + +char texencoding[128]="",*cjkencoding=NULL,cjkfont[128]=""; +iconv_t iconvhandle; +int ccmap_enable=0; + +void +errexit(void) +{ + + errx(1, "Error: line %d", yylineno); +} + +void +transcode(char *ch) +{ + char *pchar,*pout,input[16],output[128]; + const char *pin; + int c; + size_t lin,lout; + size_t outlen; + + pchar=strstr(ch,"{"); + if(pchar==NULL) + errexit(); + if(sscanf(pchar+1,"%d",&c)!=1) + errexit(); + + /* UCS-4 big endian, including not only Basic Multilingual Plane */ + input[0]=(c&0xff000000)>>24; + input[1]=(c&0xff0000)>>16; + input[2]=(c&0xff00)>>8; + input[3]=(c&0xff); + pin=input; + lin=4; + + pout=output; + lout=sizeof(output); + + iconv(iconvhandle,&pin,&lin,&pout,&lout); + + if(lin!=0) { + switch(c) { + case 8212: strcpy(output,"\\ensuremath{-}"); break; + case 8226: strcpy(output,"\\ensuremath{\\bullet}"); break; + case 8482: strcpy(output,"\\ensuremath{^{\\mathrm{TM}}}"); break; + case 10122: strcpy(output,"{\\large\\ding{202}}"); break; + case 10123: strcpy(output,"{\\large\\ding{203}}"); break; + case 10124: strcpy(output,"{\\large\\ding{204}}"); break; + case 10125: strcpy(output,"{\\large\\ding{205}}"); break; + case 10126: strcpy(output,"{\\large\\ding{206}}"); break; + case 10127: strcpy(output,"{\\large\\ding{207}}"); break; + case 10128: strcpy(output,"{\\large\\ding{208}}"); break; + case 10129: strcpy(output,"{\\large\\ding{209}}"); break; + case 10130: strcpy(output,"{\\large\\ding{210}}"); break; + case 10131: strcpy(output,"{\\large\\ding{211}}"); break; + case 10132: strcpy(output,"\\ensuremath{\\rightarrow}"); break; + case 65533: strcpy(output,"{\\large\\ding{96}}"); break; + default: + warnx("Unable to find a substitute for UNICODE character &#%d;", c); + strcpy(output,"??"); + break; + } + } else { + outlen=sizeof(output)-lout; + output[outlen]=0; + + if(outlen==2 && strcspn(output,"\\$&%#@{}^_~\x80")!=outlen) + { /* TeX special character */ + sprintf(output,"\\CJKchar{%u}{%u}", + (unsigned int)(unsigned char)output[0], + (unsigned int)(unsigned char)output[1] + ); + } + } + + printf("%s",output); +} + +%} + +%option yylineno +%option noyywrap + +fotbegin \\FOT\{[^}]*\} +fotend \\endFOT\{[^}]*\} +cjk \\Character\{[0-9]{1,5}\} + +%% + +{fotbegin} { + /* + * A confusing but practical structure: + * + * \usepackage{CJK} + * \begin{CJK*}{GB}{song} + * \FOT{3} + * + * ... + * + * \end{CJK*} + * \endFOT{} + * + * The macro call \begin{CJK*} must be put before + * \FOT, or generated PDF will include many "@". + */ + printf("\\usepackage{textcomp}\n"); + printf("\\usepackage{pifont}\n"); + printf("\\usepackage{wasysym}\n"); + printf("\\usepackage{CJK}\n"); + if(ccmap_enable) printf("\\usepackage{ccmap}\n"); + printf("\\hypersetup{CJKbookmarks=true,hypertex,pdfauthor={FreeBSD Documentation Project}}\n"); + printf("\\begin{CJK*}{%s}{%s}\n%s\n",cjkencoding,cjkfont,yytext); + } +{fotend} { + /* + * \FOT does NOT include \begin{document}, + * while \endFOT includes \end{document} explicitly. + * Thus, \endFOT should NOT be put between + * \begin{CJK*} and \end{CJK*}, + * whether there is a \FOT between them or not. + */ + printf("\n\\end{CJK*}%s\n",yytext); + } +{cjk} { transcode(yytext); } + +[\xA0] { printf("{\\nobreakspace}"); } +[\xA1] { printf("{\\textexclamdown}"); } +[\xA2] { printf("{\\textcent}"); } +[\xA3] { printf("{\\pounds}"); } +[\xA4] { printf("{\\textcurrency}"); } +[\xA5] { printf("{\\textyen}"); } +[\xA6] { printf("{\\textbrokenbar}"); } +[\xA7] { printf("{\\S}"); } +[\xA8] { printf("{\\\"{}}"); } +[\xA9] { printf("{\\copyright}"); } +[\xAA] { printf("{\\textordfeminine}"); } +[\xAB] { printf("\\ensuremath{_{^{\\ll}}}"); } +[\xAC] { printf("\\ensuremath{\\lnot}"); } +[\xAD] { printf("{-}"); } +[\xAE] { printf("{\\textregistered}"); } +[\xAF] { printf("\\ensuremath{^{-}}"); } +[\xB0] { printf("{\\textdegree}"); } +[\xB1] { printf("\\ensuremath{\\pm}"); } +[\xB2] { printf("\\ensuremath{^{2}}"); } +[\xB3] { printf("\\ensuremath{^{3}}"); } +[\xB4] { printf("\\ensuremath{'}"); } +[\xB5] { printf("\\ensuremath{\\mu}"); } +[\xB6] { printf("{\\P}"); } +[\xB7] { printf("{\\ifmmode\\cdot\\else\\textperiodcentered\\fi}"); } +[\xB8] { printf("\\c{}"); } +[\xB9] { printf("\\ensuremath{^{1}}"); } +[\xBA] { printf("{\\textordmasculine}"); } +[\xBB] { printf("\\ensuremath{_{^{\\gg}}}"); } +[\xBC] { printf("{\\textonequarter}"); } +[\xBD] { printf("{\\textonehalf}"); } +[\xBE] { printf("{\\textthreequarters}"); } +[\xBF] { printf("{\\textquestiondown}"); } +[\xC0] { printf("\\ensuremath{\\grave{\\mathrm{A}}}"); } +[\xC1] { printf("\\ensuremath{\\acute{\\mathrm{A}}}"); } +[\xC2] { printf("{\\^A}"); } +[\xC3] { printf("{\\~A}"); } +[\xC4] { printf("{\\\"A}"); } +[\xC5] { printf("{\\AA}"); } +[\xC6] { printf("{\\AE}"); } +[\xC7] { printf("{\\c C}"); } +[\xC8] { printf("\\ensuremath{\\grave{\\mathrm{E}}}"); } +[\xC9] { printf("\\ensuremath{\\acute{\\mathrm{E}}}"); } +[\xCA] { printf("{\\^E}"); } +[\xCB] { printf("{\\\"E}"); } +[\xCC] { printf("\\ensuremath{\\grave{\\mathrm{I}}}"); } +[\xCD] { printf("\\ensuremath{\\acute{\\mathrm{I}}}"); } +[\xCE] { printf("{\\^I}"); } +[\xCF] { printf("{\\\"I}"); } +[\xD0] { printf("{\\DH}"); } +[\xD1] { printf("{\\~N}"); } +[\xD2] { printf("\\ensuremath{\\grave{\\mathrm{O}}}"); } +[\xD3] { printf("\\ensuremath{\\acute{\\mathrm{O}}}"); } +[\xD4] { printf("{\\^O}"); } +[\xD5] { printf("{\\~O}"); } +[\xD6] { printf("{\\\"O}"); } +[\xD7] { printf("\\ensuremath{\\times}"); } +[\xD8] { printf("{\\O}"); } +[\xD9] { printf("\\ensuremath{\\grave{\\mathrm{U}}}"); } +[\xDA] { printf("\\ensuremath{\\acute{\\mathrm{U}}}"); } +[\xDB] { printf("{\\^U}"); } +[\xDC] { printf("{\\\"U}"); } +[\xDD] { printf("\\ensuremath{\\acute{\\mathrm{Y}}}"); } +[\xDE] { printf("{\\Thorn}"); } +[\xDF] { printf("{\\ss}"); } +[\xE0] { printf("\\ensuremath{\\grave{\\mathrm{a}}}"); } +[\xE1] { printf("\\ensuremath{\\acute{\\mathrm{a}}}"); } +[\xE2] { printf("{\\^a}"); } +[\xE3] { printf("{\\~a}"); } +[\xE4] { printf("{\\\"a}"); } +[\xE5] { printf("{\\aa}"); } +[\xE6] { printf("{\\ae}"); } +[\xE7] { printf("{\\c c}"); } +[\xE8] { printf("\\ensuremath{\\grave{\\mathrm{e}}}"); } +[\xE9] { printf("\\ensuremath{\\acute{\\mathrm{e}}}"); } +[\xEA] { printf("{\\^e}"); } +[\xEB] { printf("{\\\"e}"); } +[\xEC] { printf("\\ensuremath{\\grave{\\mathrm{\\i}}}"); } +[\xED] { printf("\\ensuremath{\\acute{\\mathrm{\\i}}}"); } +[\xEE] { printf("{\\^\\i}"); } +[\xEF] { printf("{\\\"\\i}"); } +[\xF0] { printf("{\\dh}"); } +[\xF1] { printf("{\\~n}"); } +[\xF2] { printf("\\ensuremath{\\grave{\\mathrm{o}}}"); } +[\xF3] { printf("\\ensuremath{\\acute{\\mathrm{o}}}"); } +[\xF4] { printf("{\\^o}"); } +[\xF5] { printf("{\\~o}"); } +[\xF6] { printf("{\\\"o}"); } +[\xF7] { printf("\\ensuremath{\\div}"); } +[\xF8] { printf("{\\o}"); } +[\xF9] { printf("\\ensuremath{\\grave{\\mathrm{u}}}"); } +[\xFA] { printf("\\ensuremath{\\acute{\\mathrm{u}}}"); } +[\xFB] { printf("{\\^u}"); } +[\xFC] { printf("{\\\"u}"); } +[\xFD] { printf("\\ensuremath{\\acute{\\mathrm{y}}}"); } +[\xFE] { printf("{\\thorn}"); } +[\xFF] { printf("{\\\"y}"); } + +[\xa0-\xff] { + warnx("Unable to find a substitute for ISO8859-1 character \\x%X", + (unsigned int)(*((unsigned char *)yytext))); + printf("?"); + } + +%% + +void printusage() +{ + fprintf(stderr, "Usage: cjktexsty [ -c ] -e encoding -f fontname\n" + " Convert TeX source including \\Character{xxxxx} generated by\n" + " Jade/OpenJade into what CJK-LaTeX can process.\n" + " \n" + "NOTE: Jade/OpenJade supports EUC-JP natively. Thus, this tool SHOULD NOT be\n" + " used in this case. This tool treats all bytes larger than 0xa0 as\n" + " ISO 8859-1 characters, and converts \\Character{xxxxx} into encoding\n" + " that CJK-LaTeX can process.\n" + " \n" + "Options:\n" + " -c\n" + " Use ccmap.sty for PDFTeX to generate text-copyable CJK PDF.\n" + " The package ccmap.sty is written by Wenchang Sun and Linbo Zhang.\n" + " See also ftp://ftp.cc.ac.cn/pub/cct/ for details.\n" + " -e encoding\n" + " Specify TeX source encoding for CJK-LaTeX.\n" + " -f fontname\n" + " Specify font name in CJK macro call, such as\n" + " \\begin{CJK*}{encoding}{font}.\n" + " \n" + "CJK-LaTeX supported combinations by default:\n" + " <TeX source encoding> <CJK encoding name> <CJK font name>\n" + " ------------------------------------------------------------\n" + " GB2312 GB song\n" + " GBK GBK song\n" + " BIG5 Bg5 bsmi\n" + " EUCJP JIS min\n" + " EUCKR KS \n" + " UTF-8 UTF8 song\n" + ); +} + +int +main(int argc, char *argv[]) +{ + int ch; + + while ((ch = getopt(argc, argv, "ce:f:")) != -1) + { + switch (ch) + { + case 'c': + ccmap_enable=1; + break; + case 'e': + if(strcmp(optarg,"GB2312")==0) cjkencoding="GB"; + else if(strcmp(optarg,"GBK")==0) cjkencoding="GBK"; + else if(strcmp(optarg,"GB18030")==0) cjkencoding="GBK"; /* Not supported by CJK yet */ + else if(strcmp(optarg,"BIG5")==0) cjkencoding="Bg5"; + else if(strcmp(optarg,"EUCJP")==0) cjkencoding="JIS"; + else if(strcmp(optarg,"EUCKR")==0) cjkencoding="KS"; + else if(strcmp(optarg,"UTF-8")==0) cjkencoding="UTF8"; + else cjkencoding=NULL; + if(cjkencoding!=NULL) strlcpy(texencoding,optarg,sizeof(texencoding)); + break; + case 'f': + strlcpy(cjkfont,optarg,sizeof(cjkfont)); + break; + default: + printusage(); + return 1; + break; + } + } + + if(cjkencoding==NULL) + { + printusage(); + return 1; + } + + iconvhandle=iconv_open(texencoding,"UCS-4BE"); + yylex(); + iconv_close(iconvhandle); + return 0; +} diff --git a/chinese/docproj/src/fixrtf/Makefile b/chinese/docproj/src/fixrtf/Makefile new file mode 100644 index 00000000000..9a7948fe651 --- /dev/null +++ b/chinese/docproj/src/fixrtf/Makefile @@ -0,0 +1,13 @@ +# $FreeBSD$ + +PROG= fixrtf +SRCS= fixrtf.l + +PREFIX?= /usr/local +BINDIR= ${PREFIX}/bin +CFLAGS+=-I${PREFIX}/include +LDADD= -L${PREFIX}/lib -lpng +NO_MAN= +NOMAN= + +.include <bsd.prog.mk> diff --git a/chinese/docproj/src/fixrtf/fixrtf.l b/chinese/docproj/src/fixrtf/fixrtf.l new file mode 100644 index 00000000000..25e22f2e51c --- /dev/null +++ b/chinese/docproj/src/fixrtf/fixrtf.l @@ -0,0 +1,527 @@ +%{ +/*- + * Copyright (c) 2005, 2006 intron <intron@intron.ac>. All rights reserved. + * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project. + * All rights reserved. + * + * This code is derived from software contributed to The FreeBSD Simplified + * Chinese Project by intron. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From CNPROJ: doc/zh_CN.GB2312/share/mk/fixrtf.lex,v 1.1.1000.20 2006/02/19 10:21:40 intron Exp + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <err.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include <sys/param.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <png.h> + +/* + * This program is used to fix RTF: + * 1. Embed PNGs into RTF. + * 2. Embed FreeBSD-specific information into RTF, such as organization name, + * building time. But unfortunately, so far only Microsoft Word can read + * them. In contrast, Microsoft Word Viewer and OpenOffice even cannot read + * this kind of information from RTF created by Microsoft Word and + * OpenOffice. (Option: -i) + * 3. Do some locale-specific fixing. (Option: -e <encoding>) + * + * See also Rich Text Format (RTF) Specification: + * 1. Version 1.8 (Microsoft Word 2003) + * http://www.microsoft.com/downloads/details.aspx?familyid=ac57de32-17f0-4b46-9e4e-467ef9bc5540&displaylang=en + * 2. Version 1.7 (Microsoft Word 2002) + * http://support.microsoft.com/kb/q86999/ + * 3. Version 1.6 (Microsoft Word 2000) + * http://msdn.microsoft.com/library/en-us/dnrtfspec/html/rtfspec.asp + */ + + +int embedpng_enable=0; + +/* See also http://msdn.microsoft.com/library/en-us/intl/unicode_81rn.asp */ +#define ENCODING_UNKNOWN 0 +#define ENCODING_GB2312 936 +#define ENCODING_GB18030 54936 +#define ENCODING_BIG5 950 + +int encoding=ENCODING_UNKNOWN; + + +int fetchinfo_enable=0; /* FALSE */ + + +#define MY_BUFFER_SIZE 3072 +#define MY_BUFFER_LIMIT 2048 + +/* MY_BUFFER_LIMIT is smaller MY_BUFFER_SIZE, reserving some redundance. */ + +/* + * "mybuffer" is used to cache RTF stream + * while fetching book/article information. + */ +size_t mybufferlength=0; +char mybuffer[MY_BUFFER_SIZE]; + + +#define INFO_TITLE 0 +#define INFO_AUTHOR 1 + +/* To store fetched book/article information */ +struct +{ + size_t length; + char text[MY_BUFFER_SIZE]; +} *pinfobuf=NULL,infobuf[]= +{ + {0,""}, + {0,""} +}; + +/* + * See also the section "Pictures" in RTF specification. + */ +void +embedpng(char *field) +{ + char *p1,*p2,fn[PATH_MAX]; + unsigned char buf[256]; + FILE *fp; + int l,i,nret; + png_structp png_ptr; + png_infop info_ptr,end_info; + png_uint_32 width,height; + + p1=strcasestr(field,"INCLUDEPICTURE"); + p1=strchr(p1+14,'"'); /* String after "INCLUDEPICTURE" */ + p2=strchr(p1+1,'"'); + l=p2-(p1+1); /* Substantial length of file name */ + if(l>sizeof(fn)-1) + { + warnx("*** Buffer Overflow Attack Detected !!! ***"); + exit(1); + } + memcpy(fn,p1+1,l); + fn[l]=0; + + if(l<4) /* It should be longer than ".png". */ + { + warnx("File name '%s' is too short!",fn); + goto embedpng_exit_1; + } + + if(strcasecmp(fn+(l-4),".png")!=0) + { + warnx("File name '%s' has not a suffix '.png'. Keep untouched.",fn); + goto embedpng_exit_1; + } + + if((fp=fopen(fn,"rb"))==NULL) + { + warnx("Failed to open '%s'!",fn); + goto embedpng_exit_1; + } + + fread(buf,1,8,fp); + if (png_sig_cmp(buf,0,8)) + { + warnx("The file '%s' is NOT in PNG format!",fn); + goto embedpng_exit_2; + } + png_ptr=png_create_read_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); + if (!png_ptr) + { + warnx("Unable to create PNG read struct(*png_ptr)!"); + goto embedpng_exit_2; + } + info_ptr=png_create_info_struct(png_ptr); + if (!info_ptr) + { + warnx("Unable to create PNG info struct(*info_ptr)!"); + png_destroy_read_struct(&png_ptr,(png_infopp)NULL,(png_infopp)NULL); + goto embedpng_exit_2; + } + end_info=png_create_info_struct(png_ptr); + if(!end_info) + { + warnx("Unable to create PNG info struct(*end_info)!"); + png_destroy_read_struct(&png_ptr,&info_ptr,(png_infopp)NULL); + goto embedpng_exit_2; + } + if (setjmp(png_jmpbuf(png_ptr))) + { + warnx("LibPNG crashed!"); + png_destroy_read_struct(&png_ptr,&info_ptr,&end_info); + goto embedpng_exit_2; + } + rewind(fp); + png_init_io(png_ptr,fp); + png_read_info(png_ptr,info_ptr); + width=png_get_image_width(png_ptr,info_ptr); + height=png_get_image_height(png_ptr,info_ptr); + + if(width>1024 || height>768) warnx("Picture is too large!"); + + /* + * According to Microsoft's RTF specification, \picwN and \pichN is + * mandatory for \pict group. Actually, in both Microsoft Word Viewer + * and OpenOffice, these two control words take no effect for PNG. + */ + printf("{\\pict\\pngblip\\picscalex100\\picscaley100\\picw%u\\pich%u", + (unsigned int)width,(unsigned int)height); + + rewind(fp); + while((nret=fread(buf,1,64,fp))>0) + { + printf("\n"); + for(i=0;i<nret;i++) + printf("%02x",(unsigned int)((unsigned char)buf[i])); + } + + printf("}"); + + warnx("'%s' (%ux%u) embedded.",fn,(unsigned int)width,(unsigned int)height); + + png_destroy_read_struct(&png_ptr,&info_ptr,&end_info); + fclose(fp); + goto embedpng_exit_0; + +embedpng_exit_2:; + fclose(fp); +embedpng_exit_1:; + printf("%s",field); /* Keep link in RTF untouched */ +embedpng_exit_0:; + return; +} + +/* + * See also the section "Font Table" in RTF specification. + */ +void +modifycharset(char *fcharset) +{ + char *s; + + switch(encoding) + { + case ENCODING_GB2312: + case ENCODING_GB18030: /* GB18030 is not supported in RTF so far */ + s="\\fcharset134"; + break; + case ENCODING_BIG5: + s="\\fcharset136"; + break; + default: + s="\\fcharset1"; /* "Default" */ + break; + } + + printf("%s",s); + + warnx("Charset control word modified: %s -> %s",fcharset,s); + + return; +} + +/* + * (init|addto|flush)mybuffer maintain buffer to cache RTF stream + * while fetching book/article information. + */ +void initmybuffer() +{ + int i; + + mybufferlength=0; + for(i=0;i<sizeof(infobuf)/sizeof(infobuf[0]);i++) + { + infobuf[i].length=0; + infobuf[i].text[0]=0; + } +} + +int addtomybuffer(char *text, size_t leng) +{ + if(mybufferlength+leng>MY_BUFFER_LIMIT) return -1; + /* warnx("_%s_",yytext); */ + memcpy(mybuffer+mybufferlength,text,leng); + mybufferlength+=leng; /* No terminator '\0' */ + return 0; +} + +void flushmybuffer() +{ + fwrite(mybuffer,1,mybufferlength,yyout); + mybufferlength=0; +} + +#define ADDTOBUF { \ + if(addtomybuffer(yytext,yyleng)) \ + { \ + haltfetch(); \ + ECHO; \ + BEGIN(0); \ + warnx("Had been fetching book/article information until buffer was full!"); \ + YY_BREAK; \ + } \ + } + + +/* Collect book/article information RTF sequence */ +void collectinfo(char *text, size_t leng) +{ + assert(pinfobuf!=NULL); + if(pinfobuf->length+leng>=MY_BUFFER_LIMIT) /* Consider terminator '\0' */ + { + warnx("*** Too long text for title or author !!! ***"); + warnx("*** Buffer Overflow Attack To Be Considered !!! ***"); + return; /* Information item buffer is full. */ + } + memcpy(pinfobuf->text+pinfobuf->length,text,leng); + pinfobuf->length+=leng; + pinfobuf->text[pinfobuf->length]=0; +} + +/* Identify a RTF control word */ +int identifyctrlword(char *text, size_t leng, char *key) +{ + if(text[leng-1]==' ') + { /* Tailed by a space as delimiter */ + if(strlen(key)!=leng-1) return 0; + return !strncmp(text,key,leng-1); + } + + return !strcmp(text,key); +} + +/* + * Output fetch book/article information. + * See also the section "Information Group" in RTF specification. + */ +void outputinfo() +{ + time_t t; + char buf[128]; + + printf("{\\info\\uc0"); + + printf("{\\title %s}{\\author %s}", + infobuf[INFO_TITLE].text,infobuf[INFO_AUTHOR].text); + + time(&t); + strftime(buf,sizeof(buf),"\\yr%Y\\mo%m\\dy%d\\hr%H\\min%M\\sec%S",localtime(&t)); + printf("{\\creatim%s}",buf); + + printf("}"); +} + +void haltfetch() +{ + warnx("Title: %s",infobuf[INFO_TITLE].text); + warnx("Author: %s",infobuf[INFO_AUTHOR].text); + outputinfo(); + flushmybuffer(); +} + +%} + +%option noyywrap + +%s fetchinfo + +pnglink \{\\field[^{}]*\{[^{}]*INCLUDEPICTURE[^{}]*\".+\"[^{}]*\}\{[^{}]*\}[^{}]*\} +sjischarset \\fcharset128 +stylesheet \{\\stylesheet[ ]? +titlebegin \\pard.{1,25}\\fs49[ ]? +authorbegin \\pard.{1,25}\\fs34[ ]? +rtfhexvalue \\\'[0-9A-Fa-f]{2} +rtfctrlword \\[a-z]+([-]?[0-9]+)?[ ]? +rtfctrlsymbol \\[^a-z] + +%% + +{pnglink} { /* + * Substitute RTF \pict group for RTF field group. + * An example generated by Jade/OpenJade: + * {\field\flddirty{\*\fldinst INCLUDEPICTURE "sockets/layers.png" }{\fldrslt }} + */ + if(embedpng_enable) embedpng(yytext); + else { ECHO; } + } + +{sjischarset} { + /* + * Jade/OpenJade mis-mark Chinese as Shift-JIS encoded Japanese. + * This may cause RTF viewer to display Chinese with Japanese font. + */ + if(encoding!=ENCODING_UNKNOWN) modifycharset(yytext); + else { ECHO; } + } + +{stylesheet} { /* Insert book/article information just before style sheet. */ + if(fetchinfo_enable) + { /* Begin fetching book/article information. */ + initmybuffer(); + BEGIN(fetchinfo); + fetchinfo_enable=0; /* FALSE, one-off */ + ADDTOBUF; + } + else + { + ECHO; + } + } + +<fetchinfo>{titlebegin} { /* Beginning of title, hacked by font size. */ + ADDTOBUF; + pinfobuf=&(infobuf[INFO_TITLE]); + if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */ + } + +<fetchinfo>{authorbegin} { /* Beginning of author, hacked by font size. */ + ADDTOBUF; + pinfobuf=&(infobuf[INFO_AUTHOR]); + if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */ + } + +<fetchinfo>{rtfhexvalue} { /* A hexadecimal value, ignore. */ + ADDTOBUF; + } + +<fetchinfo>\\~ { /* Nonbreaking space, a control symbol, collect */ + ADDTOBUF; + if(pinfobuf!=NULL) collectinfo(" ",1); + } + +<fetchinfo>\\[-_] { /* Optional/nonbreaking hyphen, a control symbol, collect */ + ADDTOBUF; + if(pinfobuf!=NULL) collectinfo("-",1); + } + +<fetchinfo>{rtfctrlsymbol} { /* Other control symbols, ignore */ + ADDTOBUF; + } + +<fetchinfo>{rtfctrlword} { /* Control word */ + ADDTOBUF; + + if(identifyctrlword(yytext,yyleng,"\\keepn")) + { /* End of title or author, actually a hack */ + pinfobuf=NULL; + } + else if(yytext[0]=='\\' && yytext[1]=='u' && + ((yytext[2]>='0' && yytext[2]<='9') || yytext[2]=='-') ) + { /* Unicode Character, collect */ + if(pinfobuf!=NULL) + { + collectinfo(yytext,yyleng); + if(yytext[yyleng-1]!=' ') collectinfo(" ",1); + } + } + else if(identifyctrlword(yytext,yyleng,"\\page")) + { /* Accomplished !!! */ + haltfetch(); + BEGIN(0); + } + } + +<fetchinfo>[\n{}] { /* Ignore */ + ADDTOBUF; + } + +<fetchinfo>. { /* Collect */ + ADDTOBUF; + if(pinfobuf!=NULL) collectinfo(yytext,yyleng); + } + +%% + +void printusage() +{ + fprintf(stderr, "Usage: fixrtf [-e encoding] [-i] [-p] < inputfile > outputfile\n" + " Fix RTF file generated by Jade/OpenJade.\n" + "Options:\n" + " -e encoding\n" + " Specify encoding to do specific fixing. (GB2312|BIG5)\n" + " -i\n" + " Fill RTF file information, such as title and author,\n" + " hacked from RTF file generated by Jade/OpenJade.\n" + " -p\n" + " Embed linked PNG images into RTF file.\n" + ); +} + +int +main(int argc, char *argv[]) +{ + int ch; + + if(argc<=1) + { + warnx("You should indicate at least one kind of fixing."); + printusage(); + return 1; + } + + while ((ch = getopt(argc, argv, "e:ip")) != -1) + { + switch (ch) + { + case 'e': + if(strcasecmp(optarg,"GB2312")==0 || + strcasecmp(optarg,"GBK")==0) + { + encoding=ENCODING_GB2312; + } + else if(strcasecmp(optarg,"GB18030")==0) + { + encoding=ENCODING_GB18030; + } + else if(strcasecmp(optarg,"BIG5")==0) + { + encoding=ENCODING_BIG5; + } + break; + case 'i': + fetchinfo_enable=1; /* One-off */ + break; + case 'p': + embedpng_enable=1; + break; + default: + printusage(); + return 1; + break; + } + } + + yylex(); + + return 0; +} |