diff options
author | nork <nork@FreeBSD.org> | 2008-12-31 16:08:32 +0800 |
---|---|---|
committer | nork <nork@FreeBSD.org> | 2008-12-31 16:08:32 +0800 |
commit | 030352939a26fd20559165d5726aefa1105bef70 (patch) | |
tree | 9bc46dc1ac2e0a3a5617f1a87bcf326297252b72 /japanese/awffull | |
parent | 6eabbb214c8fa7485ecf69754c4b5363826ccb57 (diff) | |
download | freebsd-ports-gnome-030352939a26fd20559165d5726aefa1105bef70.tar.gz freebsd-ports-gnome-030352939a26fd20559165d5726aefa1105bef70.tar.zst freebsd-ports-gnome-030352939a26fd20559165d5726aefa1105bef70.zip |
Add ja-awffull port, which is supported for Japanese search words (EUC-JP/Shift_JIS/
UTF-8), and is a slave port for www/awffull.
Submitted by: nyan
Reviewed by: nyan
Diffstat (limited to 'japanese/awffull')
-rw-r--r-- | japanese/awffull/Makefile | 16 | ||||
-rw-r--r-- | japanese/awffull/files/awffull-ja.diff | 267 |
2 files changed, 283 insertions, 0 deletions
diff --git a/japanese/awffull/Makefile b/japanese/awffull/Makefile new file mode 100644 index 000000000000..ca36574af878 --- /dev/null +++ b/japanese/awffull/Makefile @@ -0,0 +1,16 @@ +# New ports collection makefile for: ja-awffull +# Date created: 2008-12-31 +# Whom: Norikatsu Shigemura <nork@FreeBSD.org> +# +# $FreeBSD$ +# + +CATEGORIES= japanese www + +MAINTAINER= nork@FreeBSD.org + +MASTERDIR= ${.CURDIR}/../../www/awffull + +EXTRA_PATCHES= ${.CURDIR}/files/awffull-ja.diff + +.include "${MASTERDIR}/Makefile" diff --git a/japanese/awffull/files/awffull-ja.diff b/japanese/awffull/files/awffull-ja.diff new file mode 100644 index 000000000000..ff6d63c9c200 --- /dev/null +++ b/japanese/awffull/files/awffull-ja.diff @@ -0,0 +1,267 @@ +--- src/awffull.c.orig 2008-12-13 11:28:35.000000000 +0900 ++++ src/awffull.c 2008-12-31 16:43:45.000000000 +0900 +@@ -37,6 +37,9 @@ + /* STANDARD INCLUDES */ + /*********************************************/ + #include "awffull.h" /* main header */ ++#ifdef HAVE_ICONV ++#include <iconv.h> ++#endif + + /* internal function prototypes */ + +@@ -137,6 +140,10 @@ static char const ab_month_name[][4] = { + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }; + ++#ifdef HAVE_ICONV ++iconv_t cd_from_sjis, cd_from_eucj; ++#endif ++ + /*********************************************/ + /* MAIN - start here */ + /*********************************************/ +@@ -339,6 +346,11 @@ main(int argc, char *argv[]) + + start_time = times(&mytms); + ++#ifdef HAVE_ICONV ++ cd_from_sjis = iconv_open("UTF-8", "Shift_JIS"); ++ cd_from_eucj = iconv_open("UTF-8", "EUC-JP"); ++#endif ++ + /********************************************* + * MAIN PROCESS LOOP - read through log file * + *********************************************/ +@@ -801,9 +813,17 @@ main(int argc, char *argv[]) + } + + del_htabs(); ++#ifdef HAVE_ICONV ++ iconv_close(cd_from_sjis); ++ iconv_close(cd_from_eucj); ++#endif + /* Whew, all done! Exit with completion status (0) */ + exit(0); + } else { ++#ifdef HAVE_ICONV ++ iconv_close(cd_from_sjis); ++ iconv_close(cd_from_eucj); ++#endif + /* No valid records found... exit with error (1) */ + VPRINT(VERBOSE1, "%s\n", _("No valid records found!")); + exit(1); +@@ -1740,6 +1760,26 @@ unescape(char *str) + if (!str) + return NULL; /* make sure strings valid */ + ++ /* for apache log's escape code. */ ++ while (*cp1) { ++ if (*cp1 == '\\' && *(cp1 + 1) == 'x' && ++ isxdigit(*(cp1 + 2)) && isxdigit(*(cp1 + 3))) { ++ *cp2 = from_hex(*(cp1 + 2)) * 16 + from_hex(*(cp1 + 3)); ++ if ((*cp2 < 32) || (*cp2 == 127)) ++ *cp2 = '_'; ++ cp1 += 4; ++ cp2++; ++ } else if (*cp1 == '\\' && *(cp1 + 1) == '\\') { ++ *cp2 = '\\'; ++ cp1 += 2; ++ cp2++; ++ } else { ++ *cp2++ = *cp1++; ++ } ++ } ++ *cp2 = *cp1; ++ cp1 = cp2 = str; ++ + while (*cp1) { + if (*cp1 == '%') { /* Found an escape? */ + cp1++; +@@ -1762,16 +1802,139 @@ unescape(char *str) + return str; /* return the string */ + } + ++#ifdef HAVE_ICONV ++ ++/*********************************************/ ++/* SCORE_XXX - calculate score */ ++/*********************************************/ ++ ++int score_eucj(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str!=0;str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII ++ else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1) ++ else if(*str == 0x8f); // HOJYO KANJI ++ else if(*str == 0x8e) stat=2; // KANA ++ else if(*str < 0x20); //CTRL ++ else bad=1; ++ break; ++ case 1: ++ if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2) ++ else bad=1; ++ stat=0; ++ break; ++ case 2: ++ if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0 ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++int score_sjis(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str != 0; str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII ++ else if((*str >= 0x81 && *str <= 0x9f) || ++ (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1) ++ else if(*str >= 0xa1 && *str <= 0xdf); // KANA ++ else if(*str < 0x20); // CTRL ++ else bad=1; ++ break; ++ case 1: ++ if((*str >= 0x40 && *str <= 0x7e) || ++ (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2) ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++int score_utf8(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str != 0; str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII ++ else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc. ++ else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc. ++ else if(*str >= 0xf0 && *str <= 0xf7) stat=4; ++ else if(*str < 0x20); //CTRL ++ else bad=1; ++ break; ++ case 1: ++ if(*str >= 0x80 && *str <= 0xbf) score++; ++ else bad=1; ++ stat=0; ++ break; ++ case 2: ++ if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2) ++ else {bad=1; stat=0;} ++ break; ++ case 3: ++ if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3) ++ else bad=1; ++ stat=0; ++ break; ++ case 4: ++ case 5: ++ if(*str >= 0x80 && *str <= 0xbf) stat++; ++ else {bad=1; stat=0;} ++ break; ++ case 6: ++ if(*str >= 0x80 && *str <= 0xbf) score+=4; ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++#endif ++ + /*********************************************/ + /* SRCH_STRING - get search strings from ref */ + /*********************************************/ + void + srch_string(char *refer, char *ptr) + { +- char tmpbuf[BUFSIZE]; +- char srch[80] = ""; +- char *cp1, *cp2, *cps; ++ unsigned char tmpbuf[BUFSIZE]; ++ unsigned char srch[80] = ""; ++ unsigned char *cp1, *cp2, *cps; + int sp_flg = 0; ++#ifdef HAVE_ICONV ++ int sjis, eucj, utf8; ++ unsigned char tmpbuf2[BUFSIZE]; ++ unsigned char *cp3; ++ size_t inlen, outlen; ++#endif + + /* Check if search engine referrer or return */ + if ((cps = isinlist(search_list, refer)) == NULL) +@@ -1832,6 +1995,35 @@ srch_string(char *refer, char *ptr) + else + break; + ++#ifdef HAVE_ICONV ++ utf8 = score_utf8(cp2); ++ sjis = score_sjis(cp2); ++ eucj = score_eucj(cp2); ++ if (sjis > utf8 && sjis > eucj) { ++ iconv(cd_from_sjis, NULL, 0, NULL, 0); ++ cp3 = cp2; ++ inlen = strlen(cp2) + 1; ++ cp1 = tmpbuf2; ++ outlen = sizeof(tmpbuf2); ++ if (iconv(cd_from_sjis, ++ (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 && ++ inlen == 0) { ++ cp2 = tmpbuf2; ++ } ++ } else if (eucj > utf8 && eucj > sjis) { ++ iconv(cd_from_eucj, NULL, 0, NULL, 0); ++ cp3 = cp2; ++ inlen = strlen(cp2) + 1; ++ cp1 = tmpbuf2; ++ outlen = sizeof(tmpbuf2); ++ if (iconv(cd_from_eucj, ++ (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 && ++ inlen == 0) { ++ cp2 = tmpbuf2; ++ } ++ } ++#endif ++ + /* strip invalid chars */ + cp1 = cp2; + while (*cp1 != '\0') { +@@ -2391,6 +2583,7 @@ cleanup_refer(char *refer, char *srchstr + + /* unescape referrer */ + unescape(refer); ++ unescape(refer); /* XXX */ + + /* fix referrer field */ + cp1 = refer; |