aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--japanese/webalizer/Makefile2
-rw-r--r--www/webalizer/Makefile15
-rw-r--r--www/webalizer/files/ja-webalizer.conf-dist.patch67
-rw-r--r--www/webalizer/files/webalizer-a-urasim_2.patch241
4 files changed, 325 insertions, 0 deletions
diff --git a/japanese/webalizer/Makefile b/japanese/webalizer/Makefile
index 9a1847c55783..2695f3ddefec 100644
--- a/japanese/webalizer/Makefile
+++ b/japanese/webalizer/Makefile
@@ -12,6 +12,8 @@ MAINTAINER= dinoex@FreeBSD.org
WEBALIZER_LANG= japanese
MASTERDIR?= ${.CURDIR}/../../www/webalizer
+OPTIONS= WEBALIZER_CONV "Use character code convert patch" off
+
.if exists(${.CURDIR}/Makefile.local)
.include "${.CURDIR}/Makefile.local"
.endif
diff --git a/www/webalizer/Makefile b/www/webalizer/Makefile
index c57036b902df..c95c307cfbe8 100644
--- a/www/webalizer/Makefile
+++ b/www/webalizer/Makefile
@@ -57,6 +57,21 @@ SUPP_LANG= catalan chinese croatian czech danish dutch english \
.if defined(WEBALIZER_LANG)
CONFIGURE_ARGS+= --with-language=${WEBALIZER_LANG}
+# The patch file is written by URASHIMA Akira
+# see http://tyche.pu-toyama.ac.jp/~a-urasim/webalizer/
+.if ${WEBALIZER_LANG} == japanese
+EXTRA_PATCHES+= ${FILESDIR}/ja-webalizer.conf-dist.patch
+.endif
+.endif
+
+.if defined(WITH_WEBALIZER_CONV)
+USE_ICONV=yes
+# The patch file is written by URASHIMA Akira
+# see http://tyche.pu-toyama.ac.jp/~a-urasim/webalizer/
+EXTRA_PATCHES+= ${FILESDIR}/webalizer-a-urasim_2.patch
+CONFIGURE_ARGS+= --enable-mininls
+CONFIGURE_ENV+= LIBS="-L${LOCALBASE}/lib -liconv"
+CFLAGS+= -I${PREFIX}/include
.endif
pre-configure:
diff --git a/www/webalizer/files/ja-webalizer.conf-dist.patch b/www/webalizer/files/ja-webalizer.conf-dist.patch
new file mode 100644
index 000000000000..1124060a7d6c
--- /dev/null
+++ b/www/webalizer/files/ja-webalizer.conf-dist.patch
@@ -0,0 +1,67 @@
+--- sample.conf.orig Fri Sep 29 12:51:42 2000
++++ sample.conf Thu Oct 14 11:48:21 2004
+@@ -107,9 +107,12 @@
+
+ PageType htm*
+ PageType cgi
++#PageType shtml
+ #PageType phtml
+ #PageType php3
++#PageType php
+ #PageType pl
++#PageType rb
+
+ # UseHTTPS should be used if the analysis is being run on a
+ # secure server, and links to urls should use 'https://' instead
+@@ -153,6 +156,7 @@
+ # is 80 characters, so use multiple lines if needed.
+
+ #HTMLHead <META NAME="author" CONTENT="The Webalizer">
++HTMLHead <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=x-euc-jp">
+
+ # HTMLBody defined the HTML code to be inserted, starting with the
+ # <BODY> tag. If not specified, the default is shown below. If
+@@ -393,6 +397,9 @@
+ HideURL *.png
+ HideURL *.PNG
+ HideURL *.ra
++HideURL *.css
++HideURL *.CSS
++HideURL *.ico
+
+ # Hiding agents is kind of futile
+ #HideAgent RealPlayer
+@@ -412,6 +419,11 @@
+ #GroupReferrer excite.com/ Excite
+ #GroupReferrer infoseek.com/ InfoSeek
+ #GroupReferrer webcrawler.com/ WebCrawler
++#GroupReferrer yahoo.co.jp/ Yahoo!Japan
++#GroupReferrer google.co.jp/ GoogleJapan
++#GroupReferrer infoseek.co.jp/ InfoSeekJapan
++#GroupReferrer goo.ne.jp/ Goo
++#GroupReferrer msn.co.jp/ MSNJapan
+
+ #GroupUser root Admin users
+ #GroupUser admin Admin users
+@@ -530,6 +542,21 @@
+ SearchEngine mamma.com query=
+ SearchEngine alltheweb.com query=
+ SearchEngine northernlight.com qr=
++
++SearchEngine yahoo.co.jp p=
++SearchEngine google.co.jp q=
++SearchEngine infoseek.co.jp qt=
++SearchEngine msn.co.jp q=
++# ocn
++SearchEngine goo.ne.jp MT=
++SearchEngine biglobe.ne.jp q=
++SearchEngine nifty.com Text=
++# so-net odn
++SearchEngine excite.co.jp search=
++SearchEngine livedoor.com q=
++SearchEngine jp.aol.com query=
++#SearchEngine .google. q=
++#SearchEngine bulkfeeds.net q=
+
+ # The Dump* keywords allow the dumping of Sites, URL's, Referrers
+ # User Agents, Usernames and Search strings to seperate tab delimited
diff --git a/www/webalizer/files/webalizer-a-urasim_2.patch b/www/webalizer/files/webalizer-a-urasim_2.patch
new file mode 100644
index 000000000000..9195ab73c0b9
--- /dev/null
+++ b/www/webalizer/files/webalizer-a-urasim_2.patch
@@ -0,0 +1,241 @@
+--- webalizer.c.a-urasim Wed Apr 17 07:11:31 2002
++++ webalizer.c Tue Dec 23 23:26:23 2003
+@@ -39,6 +39,7 @@
+ #include <sys/utsname.h>
+ #include <sys/times.h>
+ #include <zlib.h>
++#include <iconv.h>
+
+ /* ensure getopt */
+ #ifdef HAVE_GETOPT_H
+@@ -224,6 +225,8 @@
+ char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */
+ int f_end; /* count to end of buffer */
+
++iconv_t cd_from_sjis, cd_from_utf8;
++
+ /*********************************************/
+ /* MAIN - start here */
+ /*********************************************/
+@@ -526,6 +529,9 @@
+
+ start_time = times(&mytms);
+
++ cd_from_sjis = iconv_open("EUC-JP", "Shift_JIS");
++ cd_from_utf8 = iconv_open("EUC-JP", "UTF-8");
++
+ /*********************************************/
+ /* MAIN PROCESS LOOP - read through log file */
+ /*********************************************/
+@@ -1345,6 +1351,9 @@
+ if (dns_db) close_cache();
+ #endif
+
++ iconv_close(cd_from_sjis);
++ iconv_close(cd_from_utf8);
++
+ /* Whew, all done! Exit with completion status (0) */
+ exit(0);
+ }
+@@ -1773,6 +1782,23 @@
+
+ if (!str) return NULL; /* make sure strings valid */
+
++ while(*cp1){ /* for apache log's escape code. */
++ if(*cp1 == '\\' && *(cp1+1) == 'x' &&
++ isxdigit(*(cp1+2)) && isxdigit(*(cp1+3))){
++ *cp2 = from_hex(*(cp1+2))*16 + from_hex(*(cp1+3));
++ if ((*cp2<32)||(*cp2==127)) *cp2='_';
++ cp1+=4; cp2++;
++
++ }
++ else if(*cp1 == '\\' && *(cp1+1) == '\\'){
++ *cp2++='\\';
++ cp1+=2;
++ }
++ else *cp2++ = *cp1++;
++ }
++ *cp2=*cp1;
++
++ cp1=cp2=str;
+ while (*cp1)
+ {
+ if (*cp1=='%') /* Found an escape? */
+@@ -1783,7 +1809,7 @@
+ if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ascii */
+ if (*cp1) *cp2+=from_hex(*cp1); /* (hopefully) character */
+ if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad */
+- if (*cp1) cp2++; cp1++;
++ if (*cp1){ cp2++; cp1++;} /* bug? */
+ }
+ else *cp2++='%';
+ }
+@@ -1793,6 +1819,116 @@
+ return str; /* return the string */
+ }
+
++int score_eucj(unsigned char *str)
++{
++ int stat=0;
++ int score=0;
++ int bad=0;
++ if(str==NULL) return -1;
++
++ for(; *str!=0;str++){
++ switch(stat){
++ case 0:
++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
++ else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1)
++ else if(*str == 0x8f); // HOJYO KANJI
++ else if(*str == 0x8e) stat=2; // KANA
++ else if(*str < 0x20); //CTRL
++ else bad=1;
++ break;
++ case 1:
++ if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2)
++ else bad=1;
++ stat=0;
++ break;
++ case 2:
++ if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0
++ else bad=1;
++ stat=0;
++ break;
++ }
++ }
++ if(bad != 0) score = -1;
++ return score;
++}
++
++int score_sjis(unsigned char *str)
++{
++ int stat=0;
++ int score=0;
++ int bad=0;
++ if(str==NULL) return -1;
++
++ for(; *str != 0; str++){
++ switch(stat){
++ case 0:
++ if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII
++ else if((*str >= 0x81 && *str <= 0x9f) ||
++ (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1)
++ else if(*str >= 0xa1 && *str <= 0xdf); // KANA
++ else if(*str < 0x20); // CTRL
++ else bad=1;
++ break;
++ case 1:
++ if((*str >= 0x40 && *str <= 0x7e) ||
++ (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2)
++ else bad=1;
++ stat=0;
++ break;
++ }
++ }
++ if(bad != 0) score = -1;
++ return score;
++}
++
++int score_utf8(unsigned char *str)
++{
++ int stat=0;
++ int score=0;
++ int bad=0;
++ if(str==NULL) return -1;
++
++ for(; *str != 0; str++){
++ switch(stat){
++ case 0:
++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
++ else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc.
++ else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc.
++ else if(*str >= 0xf0 && *str <= 0xf7) stat=4;
++ else if(*str < 0x20); //CTRL
++ else bad=1;
++ break;
++ case 1:
++ if(*str >= 0x80 && *str <= 0xbf) score++;
++ else bad=1;
++ stat=0;
++ break;
++ case 2:
++ if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2)
++ else {bad=1; stat=0;}
++ break;
++ case 3:
++ if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3)
++ else bad=1;
++ stat=0;
++ break;
++ case 4:
++ case 5:
++ if(*str >= 0x80 && *str <= 0xbf) stat++;
++ else {bad=1; stat=0;}
++ break;
++ case 6:
++ if(*str >= 0x80 && *str <= 0xbf) score+=4;
++ else bad=1;
++ stat=0;
++ break;
++ }
++ }
++ if(bad != 0) score = -1;
++ return score;
++}
++
++
+ /*********************************************/
+ /* SRCH_STRING - get search strings from ref */
+ /*********************************************/
+@@ -1804,6 +1940,10 @@
+ char srch[80]="";
+ unsigned char *cp1, *cp2, *cps;
+ int sp_flg=0;
++ int sjis, eucj, utf8;
++ char tmpbuf2[BUFSIZE];
++ size_t inlen, outlen;
++ unsigned char *cp3;
+
+ /* Check if search engine referrer or return */
+ if ( (cps=isinglist(search_list,log_rec.refer))==NULL) return;
+@@ -1839,9 +1978,39 @@
+ cp1=cp2+strlen(cp2)-1;
+ while (cp1!=cp2) if (isspace(*cp1)) *cp1--='\0'; else break;
+
++ utf8=score_utf8(cp2);
++ sjis=score_sjis(cp2);
++ eucj=score_eucj(cp2);
++ if(utf8 >= sjis && utf8 >= eucj){
++ iconv(cd_from_utf8, NULL, 0, NULL, 0);
++ cp3 = cp2;
++ inlen = strlen(cp2)+1;
++ cp1 = tmpbuf2;
++ outlen = sizeof(tmpbuf2);
++ if(iconv(cd_from_utf8, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
++ inlen == 0){
++ cp2 = tmpbuf2;
++ }
++ }
++ else if(sjis > utf8 && sjis > eucj){
++ iconv(cd_from_sjis, NULL, 0, NULL, 0);
++ cp3 = cp2;
++ inlen = strlen(cp2)+1;
++ cp1 = tmpbuf2;
++ outlen = sizeof(tmpbuf2);
++ if(iconv(cd_from_sjis, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
++ inlen == 0){
++ cp2 = tmpbuf2;
++ }
++ }
++
+ /* strip invalid chars */
+ cp1=cp2;
+- while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }
++ while (*cp1!=0) {
++ if ((*cp1<32)||(*cp1==127)) *cp1='_';
++ *cp1=tolower(*cp1);
++ cp1++;
++ }
+
+ if (put_snode(cp2,(u_long)1,sr_htab))
+ {