aboutsummaryrefslogtreecommitdiffstats
path: root/japanese/awffull
diff options
context:
space:
mode:
authornork <nork@FreeBSD.org>2008-12-31 16:08:32 +0800
committernork <nork@FreeBSD.org>2008-12-31 16:08:32 +0800
commit030352939a26fd20559165d5726aefa1105bef70 (patch)
tree9bc46dc1ac2e0a3a5617f1a87bcf326297252b72 /japanese/awffull
parent6eabbb214c8fa7485ecf69754c4b5363826ccb57 (diff)
downloadfreebsd-ports-gnome-030352939a26fd20559165d5726aefa1105bef70.tar.gz
freebsd-ports-gnome-030352939a26fd20559165d5726aefa1105bef70.tar.zst
freebsd-ports-gnome-030352939a26fd20559165d5726aefa1105bef70.zip
Add ja-awffull port, which is supported for Japanese search words (EUC-JP/Shift_JIS/
UTF-8), and is a slave port for www/awffull. Submitted by: nyan Reviewed by: nyan
Diffstat (limited to 'japanese/awffull')
-rw-r--r--japanese/awffull/Makefile16
-rw-r--r--japanese/awffull/files/awffull-ja.diff267
2 files changed, 283 insertions, 0 deletions
diff --git a/japanese/awffull/Makefile b/japanese/awffull/Makefile
new file mode 100644
index 000000000000..ca36574af878
--- /dev/null
+++ b/japanese/awffull/Makefile
@@ -0,0 +1,16 @@
+# New ports collection makefile for: ja-awffull
+# Date created: 2008-12-31
+# Whom: Norikatsu Shigemura <nork@FreeBSD.org>
+#
+# $FreeBSD$
+#
+
+CATEGORIES= japanese www
+
+MAINTAINER= nork@FreeBSD.org
+
+MASTERDIR= ${.CURDIR}/../../www/awffull
+
+EXTRA_PATCHES= ${.CURDIR}/files/awffull-ja.diff
+
+.include "${MASTERDIR}/Makefile"
diff --git a/japanese/awffull/files/awffull-ja.diff b/japanese/awffull/files/awffull-ja.diff
new file mode 100644
index 000000000000..ff6d63c9c200
--- /dev/null
+++ b/japanese/awffull/files/awffull-ja.diff
@@ -0,0 +1,267 @@
+--- src/awffull.c.orig 2008-12-13 11:28:35.000000000 +0900
++++ src/awffull.c 2008-12-31 16:43:45.000000000 +0900
+@@ -37,6 +37,9 @@
+ /* STANDARD INCLUDES */
+ /*********************************************/
+ #include "awffull.h" /* main header */
++#ifdef HAVE_ICONV
++#include <iconv.h>
++#endif
+
+ /* internal function prototypes */
+
+@@ -137,6 +140,10 @@ static char const ab_month_name[][4] = {
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+ };
+
++#ifdef HAVE_ICONV
++iconv_t cd_from_sjis, cd_from_eucj;
++#endif
++
+ /*********************************************/
+ /* MAIN - start here */
+ /*********************************************/
+@@ -339,6 +346,11 @@ main(int argc, char *argv[])
+
+ start_time = times(&mytms);
+
++#ifdef HAVE_ICONV
++ cd_from_sjis = iconv_open("UTF-8", "Shift_JIS");
++ cd_from_eucj = iconv_open("UTF-8", "EUC-JP");
++#endif
++
+ /*********************************************
+ * MAIN PROCESS LOOP - read through log file *
+ *********************************************/
+@@ -801,9 +813,17 @@ main(int argc, char *argv[])
+ }
+
+ del_htabs();
++#ifdef HAVE_ICONV
++ iconv_close(cd_from_sjis);
++ iconv_close(cd_from_eucj);
++#endif
+ /* Whew, all done! Exit with completion status (0) */
+ exit(0);
+ } else {
++#ifdef HAVE_ICONV
++ iconv_close(cd_from_sjis);
++ iconv_close(cd_from_eucj);
++#endif
+ /* No valid records found... exit with error (1) */
+ VPRINT(VERBOSE1, "%s\n", _("No valid records found!"));
+ exit(1);
+@@ -1740,6 +1760,26 @@ unescape(char *str)
+ if (!str)
+ return NULL; /* make sure strings valid */
+
++ /* for apache log's escape code. */
++ while (*cp1) {
++ if (*cp1 == '\\' && *(cp1 + 1) == 'x' &&
++ isxdigit(*(cp1 + 2)) && isxdigit(*(cp1 + 3))) {
++ *cp2 = from_hex(*(cp1 + 2)) * 16 + from_hex(*(cp1 + 3));
++ if ((*cp2 < 32) || (*cp2 == 127))
++ *cp2 = '_';
++ cp1 += 4;
++ cp2++;
++ } else if (*cp1 == '\\' && *(cp1 + 1) == '\\') {
++ *cp2 = '\\';
++ cp1 += 2;
++ cp2++;
++ } else {
++ *cp2++ = *cp1++;
++ }
++ }
++ *cp2 = *cp1;
++ cp1 = cp2 = str;
++
+ while (*cp1) {
+ if (*cp1 == '%') { /* Found an escape? */
+ cp1++;
+@@ -1762,16 +1802,139 @@ unescape(char *str)
+ return str; /* return the string */
+ }
+
++#ifdef HAVE_ICONV
++
++/*********************************************/
++/* SCORE_XXX - calculate score */
++/*********************************************/
++
++int score_eucj(unsigned char *str)
++{
++ int stat=0;
++ int score=0;
++ int bad=0;
++ if(str==NULL) return -1;
++
++ for(; *str!=0;str++){
++ switch(stat){
++ case 0:
++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
++ else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1)
++ else if(*str == 0x8f); // HOJYO KANJI
++ else if(*str == 0x8e) stat=2; // KANA
++ else if(*str < 0x20); //CTRL
++ else bad=1;
++ break;
++ case 1:
++ if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2)
++ else bad=1;
++ stat=0;
++ break;
++ case 2:
++ if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0
++ else bad=1;
++ stat=0;
++ break;
++ }
++ }
++ if(bad != 0) score = -1;
++ return score;
++}
++
++int score_sjis(unsigned char *str)
++{
++ int stat=0;
++ int score=0;
++ int bad=0;
++ if(str==NULL) return -1;
++
++ for(; *str != 0; str++){
++ switch(stat){
++ case 0:
++ if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII
++ else if((*str >= 0x81 && *str <= 0x9f) ||
++ (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1)
++ else if(*str >= 0xa1 && *str <= 0xdf); // KANA
++ else if(*str < 0x20); // CTRL
++ else bad=1;
++ break;
++ case 1:
++ if((*str >= 0x40 && *str <= 0x7e) ||
++ (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2)
++ else bad=1;
++ stat=0;
++ break;
++ }
++ }
++ if(bad != 0) score = -1;
++ return score;
++}
++
++int score_utf8(unsigned char *str)
++{
++ int stat=0;
++ int score=0;
++ int bad=0;
++ if(str==NULL) return -1;
++
++ for(; *str != 0; str++){
++ switch(stat){
++ case 0:
++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
++ else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc.
++ else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc.
++ else if(*str >= 0xf0 && *str <= 0xf7) stat=4;
++ else if(*str < 0x20); //CTRL
++ else bad=1;
++ break;
++ case 1:
++ if(*str >= 0x80 && *str <= 0xbf) score++;
++ else bad=1;
++ stat=0;
++ break;
++ case 2:
++ if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2)
++ else {bad=1; stat=0;}
++ break;
++ case 3:
++ if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3)
++ else bad=1;
++ stat=0;
++ break;
++ case 4:
++ case 5:
++ if(*str >= 0x80 && *str <= 0xbf) stat++;
++ else {bad=1; stat=0;}
++ break;
++ case 6:
++ if(*str >= 0x80 && *str <= 0xbf) score+=4;
++ else bad=1;
++ stat=0;
++ break;
++ }
++ }
++ if(bad != 0) score = -1;
++ return score;
++}
++
++#endif
++
+ /*********************************************/
+ /* SRCH_STRING - get search strings from ref */
+ /*********************************************/
+ void
+ srch_string(char *refer, char *ptr)
+ {
+- char tmpbuf[BUFSIZE];
+- char srch[80] = "";
+- char *cp1, *cp2, *cps;
++ unsigned char tmpbuf[BUFSIZE];
++ unsigned char srch[80] = "";
++ unsigned char *cp1, *cp2, *cps;
+ int sp_flg = 0;
++#ifdef HAVE_ICONV
++ int sjis, eucj, utf8;
++ unsigned char tmpbuf2[BUFSIZE];
++ unsigned char *cp3;
++ size_t inlen, outlen;
++#endif
+
+ /* Check if search engine referrer or return */
+ if ((cps = isinlist(search_list, refer)) == NULL)
+@@ -1832,6 +1995,35 @@ srch_string(char *refer, char *ptr)
+ else
+ break;
+
++#ifdef HAVE_ICONV
++ utf8 = score_utf8(cp2);
++ sjis = score_sjis(cp2);
++ eucj = score_eucj(cp2);
++ if (sjis > utf8 && sjis > eucj) {
++ iconv(cd_from_sjis, NULL, 0, NULL, 0);
++ cp3 = cp2;
++ inlen = strlen(cp2) + 1;
++ cp1 = tmpbuf2;
++ outlen = sizeof(tmpbuf2);
++ if (iconv(cd_from_sjis,
++ (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
++ inlen == 0) {
++ cp2 = tmpbuf2;
++ }
++ } else if (eucj > utf8 && eucj > sjis) {
++ iconv(cd_from_eucj, NULL, 0, NULL, 0);
++ cp3 = cp2;
++ inlen = strlen(cp2) + 1;
++ cp1 = tmpbuf2;
++ outlen = sizeof(tmpbuf2);
++ if (iconv(cd_from_eucj,
++ (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
++ inlen == 0) {
++ cp2 = tmpbuf2;
++ }
++ }
++#endif
++
+ /* strip invalid chars */
+ cp1 = cp2;
+ while (*cp1 != '\0') {
+@@ -2391,6 +2583,7 @@ cleanup_refer(char *refer, char *srchstr
+
+ /* unescape referrer */
+ unescape(refer);
++ unescape(refer); /* XXX */
+
+ /* fix referrer field */
+ cp1 = refer;