diff options
-rw-r--r-- | japanese/webalizer/Makefile | 2 | ||||
-rw-r--r-- | www/webalizer/Makefile | 15 | ||||
-rw-r--r-- | www/webalizer/files/ja-webalizer.conf-dist.patch | 67 | ||||
-rw-r--r-- | www/webalizer/files/webalizer-a-urasim_2.patch | 241 |
4 files changed, 325 insertions, 0 deletions
diff --git a/japanese/webalizer/Makefile b/japanese/webalizer/Makefile index 9a1847c55783..2695f3ddefec 100644 --- a/japanese/webalizer/Makefile +++ b/japanese/webalizer/Makefile @@ -12,6 +12,8 @@ MAINTAINER= dinoex@FreeBSD.org WEBALIZER_LANG= japanese MASTERDIR?= ${.CURDIR}/../../www/webalizer +OPTIONS= WEBALIZER_CONV "Use character code convert patch" off + .if exists(${.CURDIR}/Makefile.local) .include "${.CURDIR}/Makefile.local" .endif diff --git a/www/webalizer/Makefile b/www/webalizer/Makefile index c57036b902df..c95c307cfbe8 100644 --- a/www/webalizer/Makefile +++ b/www/webalizer/Makefile @@ -57,6 +57,21 @@ SUPP_LANG= catalan chinese croatian czech danish dutch english \ .if defined(WEBALIZER_LANG) CONFIGURE_ARGS+= --with-language=${WEBALIZER_LANG} +# The patch file is written by URASHIMA Akira +# see http://tyche.pu-toyama.ac.jp/~a-urasim/webalizer/ +.if ${WEBALIZER_LANG} == japanese +EXTRA_PATCHES+= ${FILESDIR}/ja-webalizer.conf-dist.patch +.endif +.endif + +.if defined(WITH_WEBALIZER_CONV) +USE_ICONV=yes +# The patch file is written by URASHIMA Akira +# see http://tyche.pu-toyama.ac.jp/~a-urasim/webalizer/ +EXTRA_PATCHES+= ${FILESDIR}/webalizer-a-urasim_2.patch +CONFIGURE_ARGS+= --enable-mininls +CONFIGURE_ENV+= LIBS="-L${LOCALBASE}/lib -liconv" +CFLAGS+= -I${PREFIX}/include .endif pre-configure: diff --git a/www/webalizer/files/ja-webalizer.conf-dist.patch b/www/webalizer/files/ja-webalizer.conf-dist.patch new file mode 100644 index 000000000000..1124060a7d6c --- /dev/null +++ b/www/webalizer/files/ja-webalizer.conf-dist.patch @@ -0,0 +1,67 @@ +--- sample.conf.orig Fri Sep 29 12:51:42 2000 ++++ sample.conf Thu Oct 14 11:48:21 2004 +@@ -107,9 +107,12 @@ + + PageType htm* + PageType cgi ++#PageType shtml + #PageType phtml + #PageType php3 ++#PageType php + #PageType pl ++#PageType rb + + # UseHTTPS should be used if the analysis is being run on a + # secure server, and links to urls should use 'https://' instead +@@ -153,6 +156,7 @@ + # is 80 characters, so use multiple lines if needed. + + #HTMLHead <META NAME="author" CONTENT="The Webalizer"> ++HTMLHead <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=x-euc-jp"> + + # HTMLBody defined the HTML code to be inserted, starting with the + # <BODY> tag. If not specified, the default is shown below. If +@@ -393,6 +397,9 @@ + HideURL *.png + HideURL *.PNG + HideURL *.ra ++HideURL *.css ++HideURL *.CSS ++HideURL *.ico + + # Hiding agents is kind of futile + #HideAgent RealPlayer +@@ -412,6 +419,11 @@ + #GroupReferrer excite.com/ Excite + #GroupReferrer infoseek.com/ InfoSeek + #GroupReferrer webcrawler.com/ WebCrawler ++#GroupReferrer yahoo.co.jp/ Yahoo!Japan ++#GroupReferrer google.co.jp/ GoogleJapan ++#GroupReferrer infoseek.co.jp/ InfoSeekJapan ++#GroupReferrer goo.ne.jp/ Goo ++#GroupReferrer msn.co.jp/ MSNJapan + + #GroupUser root Admin users + #GroupUser admin Admin users +@@ -530,6 +542,21 @@ + SearchEngine mamma.com query= + SearchEngine alltheweb.com query= + SearchEngine northernlight.com qr= ++ ++SearchEngine yahoo.co.jp p= ++SearchEngine google.co.jp q= ++SearchEngine infoseek.co.jp qt= ++SearchEngine msn.co.jp q= ++# ocn ++SearchEngine goo.ne.jp MT= ++SearchEngine biglobe.ne.jp q= ++SearchEngine nifty.com Text= ++# so-net odn ++SearchEngine excite.co.jp search= ++SearchEngine livedoor.com q= ++SearchEngine jp.aol.com query= ++#SearchEngine .google. q= ++#SearchEngine bulkfeeds.net q= + + # The Dump* keywords allow the dumping of Sites, URL's, Referrers + # User Agents, Usernames and Search strings to seperate tab delimited diff --git a/www/webalizer/files/webalizer-a-urasim_2.patch b/www/webalizer/files/webalizer-a-urasim_2.patch new file mode 100644 index 000000000000..9195ab73c0b9 --- /dev/null +++ b/www/webalizer/files/webalizer-a-urasim_2.patch @@ -0,0 +1,241 @@ +--- webalizer.c.a-urasim Wed Apr 17 07:11:31 2002 ++++ webalizer.c Tue Dec 23 23:26:23 2003 +@@ -39,6 +39,7 @@ + #include <sys/utsname.h> + #include <sys/times.h> + #include <zlib.h> ++#include <iconv.h> + + /* ensure getopt */ + #ifdef HAVE_GETOPT_H +@@ -224,6 +225,8 @@ + char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */ + int f_end; /* count to end of buffer */ + ++iconv_t cd_from_sjis, cd_from_utf8; ++ + /*********************************************/ + /* MAIN - start here */ + /*********************************************/ +@@ -526,6 +529,9 @@ + + start_time = times(&mytms); + ++ cd_from_sjis = iconv_open("EUC-JP", "Shift_JIS"); ++ cd_from_utf8 = iconv_open("EUC-JP", "UTF-8"); ++ + /*********************************************/ + /* MAIN PROCESS LOOP - read through log file */ + /*********************************************/ +@@ -1345,6 +1351,9 @@ + if (dns_db) close_cache(); + #endif + ++ iconv_close(cd_from_sjis); ++ iconv_close(cd_from_utf8); ++ + /* Whew, all done! Exit with completion status (0) */ + exit(0); + } +@@ -1773,6 +1782,23 @@ + + if (!str) return NULL; /* make sure strings valid */ + ++ while(*cp1){ /* for apache log's escape code. */ ++ if(*cp1 == '\\' && *(cp1+1) == 'x' && ++ isxdigit(*(cp1+2)) && isxdigit(*(cp1+3))){ ++ *cp2 = from_hex(*(cp1+2))*16 + from_hex(*(cp1+3)); ++ if ((*cp2<32)||(*cp2==127)) *cp2='_'; ++ cp1+=4; cp2++; ++ ++ } ++ else if(*cp1 == '\\' && *(cp1+1) == '\\'){ ++ *cp2++='\\'; ++ cp1+=2; ++ } ++ else *cp2++ = *cp1++; ++ } ++ *cp2=*cp1; ++ ++ cp1=cp2=str; + while (*cp1) + { + if (*cp1=='%') /* Found an escape? */ +@@ -1783,7 +1809,7 @@ + if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ascii */ + if (*cp1) *cp2+=from_hex(*cp1); /* (hopefully) character */ + if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad */ +- if (*cp1) cp2++; cp1++; ++ if (*cp1){ cp2++; cp1++;} /* bug? */ + } + else *cp2++='%'; + } +@@ -1793,6 +1819,116 @@ + return str; /* return the string */ + } + ++int score_eucj(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str!=0;str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII ++ else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1) ++ else if(*str == 0x8f); // HOJYO KANJI ++ else if(*str == 0x8e) stat=2; // KANA ++ else if(*str < 0x20); //CTRL ++ else bad=1; ++ break; ++ case 1: ++ if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2) ++ else bad=1; ++ stat=0; ++ break; ++ case 2: ++ if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0 ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++int score_sjis(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str != 0; str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII ++ else if((*str >= 0x81 && *str <= 0x9f) || ++ (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1) ++ else if(*str >= 0xa1 && *str <= 0xdf); // KANA ++ else if(*str < 0x20); // CTRL ++ else bad=1; ++ break; ++ case 1: ++ if((*str >= 0x40 && *str <= 0x7e) || ++ (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2) ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++int score_utf8(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str != 0; str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII ++ else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc. ++ else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc. ++ else if(*str >= 0xf0 && *str <= 0xf7) stat=4; ++ else if(*str < 0x20); //CTRL ++ else bad=1; ++ break; ++ case 1: ++ if(*str >= 0x80 && *str <= 0xbf) score++; ++ else bad=1; ++ stat=0; ++ break; ++ case 2: ++ if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2) ++ else {bad=1; stat=0;} ++ break; ++ case 3: ++ if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3) ++ else bad=1; ++ stat=0; ++ break; ++ case 4: ++ case 5: ++ if(*str >= 0x80 && *str <= 0xbf) stat++; ++ else {bad=1; stat=0;} ++ break; ++ case 6: ++ if(*str >= 0x80 && *str <= 0xbf) score+=4; ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++ + /*********************************************/ + /* SRCH_STRING - get search strings from ref */ + /*********************************************/ +@@ -1804,6 +1940,10 @@ + char srch[80]=""; + unsigned char *cp1, *cp2, *cps; + int sp_flg=0; ++ int sjis, eucj, utf8; ++ char tmpbuf2[BUFSIZE]; ++ size_t inlen, outlen; ++ unsigned char *cp3; + + /* Check if search engine referrer or return */ + if ( (cps=isinglist(search_list,log_rec.refer))==NULL) return; +@@ -1839,9 +1978,39 @@ + cp1=cp2+strlen(cp2)-1; + while (cp1!=cp2) if (isspace(*cp1)) *cp1--='\0'; else break; + ++ utf8=score_utf8(cp2); ++ sjis=score_sjis(cp2); ++ eucj=score_eucj(cp2); ++ if(utf8 >= sjis && utf8 >= eucj){ ++ iconv(cd_from_utf8, NULL, 0, NULL, 0); ++ cp3 = cp2; ++ inlen = strlen(cp2)+1; ++ cp1 = tmpbuf2; ++ outlen = sizeof(tmpbuf2); ++ if(iconv(cd_from_utf8, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 && ++ inlen == 0){ ++ cp2 = tmpbuf2; ++ } ++ } ++ else if(sjis > utf8 && sjis > eucj){ ++ iconv(cd_from_sjis, NULL, 0, NULL, 0); ++ cp3 = cp2; ++ inlen = strlen(cp2)+1; ++ cp1 = tmpbuf2; ++ outlen = sizeof(tmpbuf2); ++ if(iconv(cd_from_sjis, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 && ++ inlen == 0){ ++ cp2 = tmpbuf2; ++ } ++ } ++ + /* strip invalid chars */ + cp1=cp2; +- while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; } ++ while (*cp1!=0) { ++ if ((*cp1<32)||(*cp1==127)) *cp1='_'; ++ *cp1=tolower(*cp1); ++ cp1++; ++ } + + if (put_snode(cp2,(u_long)1,sr_htab)) + { |