Add koi8-r support to jade to solve problem with koi8-r encoding

in printed formats. SP_ENCODING=koi8-r should be passed to jade. Obtained from: ftp://ftp.ptc.spbu.ru/people/uwe/sgml/ (slightly modified) Found by: Denis Philippov Submitted by: den
author: kuriyama <kuriyama@FreeBSD.org> 2004-08-04 07:25:17 +0800
committer: kuriyama <kuriyama@FreeBSD.org> 2004-08-04 07:25:17 +0800
commit: dc76a42c492e7be9d38b59fee98cec45c3507e12 (patch)
tree: b4a7b1415e0e29fddae9f7329e7a06d058ddaa42 /textproc
parent: d6b310e8e39aee9725910036554d2e88b42e63fe (diff)
download: freebsd-ports-graphics-dc76a42c492e7be9d38b59fee98cec45c3507e12.tar.gz
freebsd-ports-graphics-dc76a42c492e7be9d38b59fee98cec45c3507e12.tar.zst
freebsd-ports-graphics-dc76a42c492e7be9d38b59fee98cec45c3507e12.zip
7 files changed, 351 insertions, 1 deletions
diff --git a/textproc/jade/Makefile b/textproc/jade/Makefile
index 861fdf38855..18154a15598 100644
--- a/textproc/jade/Makefile
+++ b/textproc/jade/Makefile
@@ -7,7 +7,7 @@
 
 PORTNAME=	jade
 PORTVERSION=	1.2.1
-PORTREVISION=	7
+PORTREVISION=	8
 CATEGORIES=	textproc
 MASTER_SITES=	ftp://ftp.jclark.com/pub/jade/
 
diff --git a/textproc/jade/files/patch-CharsetRegistry.cxx b/textproc/jade/files/patch-CharsetRegistry.cxx
new file mode 100644
index 00000000000..940ba42bfb3
--- /dev/null
+++ b/textproc/jade/files/patch-CharsetRegistry.cxx
@@ -0,0 +1,20 @@
+--- lib/CharsetRegistry.cxx.orig	Sat Jul 31 17:03:07 2004
++++ lib/CharsetRegistry.cxx	Sat Jul 31 17:05:46 2004
+@@ -164,6 +164,9 @@
+ static const unsigned short iso8859_9[] = {
+ #include "iso8859-9.h"
+ };
++static const unsigned short koi8_r[] = {
++#include "koi8-r.h"
++};
+ static const unsigned short iso646_jis_G0[] = {
+ #include "iso646-jis.h"
+ };
+@@ -203,6 +206,7 @@
+   { CharsetRegistry::ISO8859_7, iso8859_7 },
+   { CharsetRegistry::ISO8859_8, iso8859_8 },
+   { CharsetRegistry::ISO8859_9, iso8859_9 },
++  { CharsetRegistry::KOI8_R, koi8_r },
+   { CharsetRegistry::ISO646_JIS_G0, iso646_jis_G0 },
+   { CharsetRegistry::JIS0201, jis0201 },
+ #ifdef SP_MULTI_BYTE
diff --git a/textproc/jade/files/patch-CharsetRegistry.h b/textproc/jade/files/patch-CharsetRegistry.h
new file mode 100644
index 00000000000..fb06fe3e7ee
--- /dev/null
+++ b/textproc/jade/files/patch-CharsetRegistry.h
@@ -0,0 +1,10 @@
+--- include/CharsetRegistry.h.orig	Sat Jul 31 17:02:57 2004
++++ include/CharsetRegistry.h	Sat Jul 31 17:05:32 2004
+@@ -45,6 +45,7 @@
+     GB2312 = 58,
+     ISO10646_UCS2 = 176,
+     ISO10646_UCS4 = 177,
++    KOI8_R = 65534, // not registered
+     BIG5 = 65535 // not registered
+   };
+   static ISORegistrationNumber getRegistrationNumber(const StringC &desig,
diff --git a/textproc/jade/files/patch-CodingSystemKit.cxx b/textproc/jade/files/patch-CodingSystemKit.cxx
new file mode 100644
index 00000000000..9a88b5eba36
--- /dev/null
+++ b/textproc/jade/files/patch-CodingSystemKit.cxx
@@ -0,0 +1,62 @@
+--- lib/CodingSystemKit.cxx.orig	Sat Jul 31 17:03:29 2004
++++ lib/CodingSystemKit.cxx	Sat Jul 31 17:08:59 2004
+@@ -74,7 +74,8 @@
+     iso8859_6,
+     iso8859_7,
+     iso8859_8,
+-    iso8859_9
++    iso8859_9,
++    koi8_r
+   };
+   struct Entry {
+     const char *name;
+@@ -111,6 +112,7 @@
+   TranslateCodingSystem iso8859_7CodingSystem_;
+   TranslateCodingSystem iso8859_8CodingSystem_;
+   TranslateCodingSystem iso8859_9CodingSystem_;
++  TranslateCodingSystem koi8_rCodingSystem_;
+ #ifdef WIN32
+   Win32CodingSystem ansiCodingSystem_;
+   Win32CodingSystem oemCodingSystem_;
+@@ -246,6 +248,15 @@
+   { CharsetRegistry::UNREGISTERED, 0x0 }
+ };
+ 
++static const TranslateCodingSystem::Desc koi8_rDesc[] = {
++  { CharsetRegistry::ISO646_C0, 0x0 },
++  { CharsetRegistry::ISO646_ASCII_G0, 0x0 },
++  // FIXME: only GR part of KOI8-R is handled (i.e. 160..255)
++  //        since koi8-r does not follow ISO control/graphic model
++  { CharsetRegistry::KOI8_R, 0x80 },
++  { CharsetRegistry::UNREGISTERED, 0x0 }
++};
++
+ #endif /* SP_MULTI_BYTE */
+ 
+ const CodingSystemKitImpl::Entry CodingSystemKitImpl::bctfTable_[] = {
+@@ -292,6 +303,8 @@
+   { "ISO-8859-8", iso8859_8 },
+   { "IS8859-9", iso8859_9 },
+   { "ISO-8859-9", iso8859_9 },
++  { "KOI8-R", koi8_r }, // RFC 1489
++  { "KOI8", koi8_r },
+   { "EUC-JP", eucjp },
+   { "EUC-CN", euccn },
+   { "GB2312", euccn },
+@@ -324,6 +337,7 @@
+   iso8859_7CodingSystem_(&identityCodingSystem_, iso8859_7Desc, &systemCharset_, 0x100, unicodeReplaceChar),
+   iso8859_8CodingSystem_(&identityCodingSystem_, iso8859_8Desc, &systemCharset_, 0x100, unicodeReplaceChar),
+   iso8859_9CodingSystem_(&identityCodingSystem_, iso8859_9Desc, &systemCharset_, 0x100, unicodeReplaceChar),
++  koi8_rCodingSystem_(&identityCodingSystem_, koi8_rDesc, &systemCharset_, 0x100, unicodeReplaceChar),
+   eucjpCodingSystem_(&eucBctf_, jis2Desc, &systemCharset_, 0x8000, unicodeReplaceChar),
+   euccnCodingSystem_(&eucBctf_, gbDesc, &systemCharset_, 0x8000, unicodeReplaceChar),
+   euckrCodingSystem_(&eucBctf_, kscDesc, &systemCharset_, 0x8000, unicodeReplaceChar),
+@@ -472,6 +486,8 @@
+     return &iso8859_8CodingSystem_;
+   case iso8859_9:
+     return &iso8859_9CodingSystem_;
++  case koi8_r:
++    return &koi8_rCodingSystem_;
+   case xml:
+     return &xmlCodingSystem_;
+ #ifdef WIN32
diff --git a/textproc/jade/files/patch-koi8-r.h b/textproc/jade/files/patch-koi8-r.h
new file mode 100644
index 00000000000..96c76a800d2
--- /dev/null
+++ b/textproc/jade/files/patch-koi8-r.h
@@ -0,0 +1,18 @@
+--- /dev/null	Sat Jul 31 17:25:08 2004
++++ include/koi8-r.h	Sat Jul 31 17:04:24 2004
+@@ -0,0 +1,15 @@
++/* KOI8-R (GR only) */
++96, 0x0020,
++0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
++0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,
++0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
++0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,
++0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
++0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
++0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
++0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,
++0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
++0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
++0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
++0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a,
++0
diff --git a/textproc/jade/files/patch-koi8r.dcl b/textproc/jade/files/patch-koi8r.dcl
new file mode 100644
index 00000000000..ee85848ee56
--- /dev/null
+++ b/textproc/jade/files/patch-koi8r.dcl
@@ -0,0 +1,239 @@
+--- /dev/null	Sat Jul 31 17:25:08 2004
++++ pubtext/koi8r.dcl	Sat Jul 31 17:04:35 2004
+@@ -0,0 +1,236 @@
++		     <!SGML "ISO 8879:1986 (ENR)"
++
++-- SGML Declaration that declares document charset to be the alphabet   --
++-- part of the koi8-r encoding.                                         --
++
++-- Derived from default declaration for James Clark's SP SGML parser.   --
++-- Only parts related to character sets was modified.  Tailor the rest  --
++-- of the defaults to suit your needs.                                  --
++
++-- Thanks to OmniMark for their excellent paper "Understanding The      --
++-- SGML Declaration" (http://www.omnimark.com/resources/white/dec/)     --
++-- [Copyright OmniMark Technologies Corporation, 1997. All rights       --
++-- reserved].  Few key paragraphs from this paper are quoted below to   --
++-- give you some idea of what is going on.                              --
++
++-- Many, many thanks to James Clark for his patience and kind help.     --
++
++
++
++-- The meanings of characters in an SGML document are defined using     --
++-- three types of character sets:                                       --
++
++-- 1. The "syntax-reference character set" is used to assign meanings   --
++--    to character numbers.                                             --
++
++-- 2. A correspondence is defined between characters in the             --
++--    syntax-reference character set and characters in a "base          --
++--    character set", which transfers the meanings of syntax-reference  --
++--    characters to corresponding base characters.                      --
++
++-- 3. A correspondence is then defined between characters in one or     --
++--    more base character sets and the "document character set".  One   --
++--    of the base character sets must have been defined by being        --
++--    associated with a syntax-reference character set in a previous    --
++--    step.                                                             --
++
++CHARSET
++
++-- The document character set is what defines what characters can be    --
++-- used in the markup and text of a document and defines what those     --
++-- characters mean.                                                     --
++
++        -- The document character set part of the SGML Declaration      --
++        -- selects one or more base character sets ...                  --
++
++        BASESET  "ISO Registration Number 176//CHARSET
++                  ISO/IEC 10646-1:1993 UCS-2
++                  with implementation level 3//ESC 2/5 2/15 4/5"
++
++
++        -- ... and defines the correspondences between characters in    --
++        -- the base character sets and the document character set.      --
++
++        DESCSET    0  9 UNUSED
++                   9  2      9
++                  11  2 UNUSED
++                  13  1     13
++                  14 18 UNUSED
++                  32 95     32
++                 127 36 UNUSED
++                 163  1   1105 -- CYRILLIC SMALL LETTER IO --
++                 164 15 UNUSED
++                 179  1   1025 -- CYRILLIC CAPITAL LETTER IO --
++                 180 11 UNUSED
++                 191  1    169 -- COPYRIGHT SIGN --
++                 192  1   1102 -- CYRILLIC SMALL LETTER YU --
++                 193  1   1072 -- CYRILLIC SMALL LETTER A --
++                 194  1   1073 -- CYRILLIC SMALL LETTER BE --
++                 195  1   1094 -- CYRILLIC SMALL LETTER TSE --
++                 196  1   1076 -- CYRILLIC SMALL LETTER DE --
++                 197  1   1077 -- CYRILLIC SMALL LETTER IE --
++                 198  1   1092 -- CYRILLIC SMALL LETTER EF --
++                 199  1   1075 -- CYRILLIC SMALL LETTER GHE --
++                 200  1   1093 -- CYRILLIC SMALL LETTER HA --
++                 201  1   1080 -- CYRILLIC SMALL LETTER I --
++                 202  1   1081 -- CYRILLIC SMALL LETTER SHORT I --
++                 203  1   1082 -- CYRILLIC SMALL LETTER KA --
++                 204  1   1083 -- CYRILLIC SMALL LETTER EL --
++                 205  1   1084 -- CYRILLIC SMALL LETTER EM --
++                 206  1   1085 -- CYRILLIC SMALL LETTER EN --
++                 207  1   1086 -- CYRILLIC SMALL LETTER O --
++                 208  1   1087 -- CYRILLIC SMALL LETTER PE --
++                 209  1   1103 -- CYRILLIC SMALL LETTER YA --
++                 210  1   1088 -- CYRILLIC SMALL LETTER ER --
++                 211  1   1089 -- CYRILLIC SMALL LETTER ES --
++                 212  1   1090 -- CYRILLIC SMALL LETTER TE --
++                 213  1   1091 -- CYRILLIC SMALL LETTER U --
++                 214  1   1078 -- CYRILLIC SMALL LETTER ZHE --
++                 215  1   1074 -- CYRILLIC SMALL LETTER VE --
++                 216  1   1100 -- CYRILLIC SMALL LETTER SOFT SIGN --
++                 217  1   1099 -- CYRILLIC SMALL LETTER YERU --
++                 218  1   1079 -- CYRILLIC SMALL LETTER ZE --
++                 219  1   1096 -- CYRILLIC SMALL LETTER SHA --
++                 220  1   1101 -- CYRILLIC SMALL LETTER E --
++                 222  1   1095 -- CYRILLIC SMALL LETTER CHE --
++                 221  1   1097 -- CYRILLIC SMALL LETTER SHCHA --
++                 223  1   1098 -- CYRILLIC SMALL LETTER HARD SIGN --
++                 224  1   1070 -- CYRILLIC CAPITAL LETTER YU --
++                 225  1   1040 -- CYRILLIC CAPITAL LETTER A --
++                 226  1   1041 -- CYRILLIC CAPITAL LETTER BE --
++                 227  1   1062 -- CYRILLIC CAPITAL LETTER TSE --
++                 228  1   1044 -- CYRILLIC CAPITAL LETTER DE --
++                 229  1   1045 -- CYRILLIC CAPITAL LETTER IE --
++                 230  1   1060 -- CYRILLIC CAPITAL LETTER EF --
++                 231  1   1043 -- CYRILLIC CAPITAL LETTER GHE --
++                 232  1   1061 -- CYRILLIC CAPITAL LETTER HA --
++                 233  1   1048 -- CYRILLIC CAPITAL LETTER I --
++                 234  1   1049 -- CYRILLIC CAPITAL LETTER SHORT I --
++                 235  1   1050 -- CYRILLIC CAPITAL LETTER KA --
++                 236  1   1051 -- CYRILLIC CAPITAL LETTER EL --
++                 237  1   1052 -- CYRILLIC CAPITAL LETTER EM --
++                 238  1   1053 -- CYRILLIC CAPITAL LETTER EN --
++                 239  1   1054 -- CYRILLIC CAPITAL LETTER O --
++                 240  1   1055 -- CYRILLIC CAPITAL LETTER PE --
++                 241  1   1071 -- CYRILLIC CAPITAL LETTER YA --
++                 242  1   1056 -- CYRILLIC CAPITAL LETTER ER --
++                 243  1   1057 -- CYRILLIC CAPITAL LETTER ES --
++                 244  1   1058 -- CYRILLIC CAPITAL LETTER TE --
++                 245  1   1059 -- CYRILLIC CAPITAL LETTER U --
++                 246  1   1046 -- CYRILLIC CAPITAL LETTER ZHE --
++                 247  1   1042 -- CYRILLIC CAPITAL LETTER VE --
++                 248  1   1068 -- CYRILLIC CAPITAL LETTER SOFT SIGN --
++                 249  1   1067 -- CYRILLIC CAPITAL LETTER YERU --
++                 250  1   1047 -- CYRILLIC CAPITAL LETTER ZE --
++                 251  1   1064 -- CYRILLIC CAPITAL LETTER SHA --
++                 252  1   1069 -- CYRILLIC CAPITAL LETTER E --
++                 253  1   1065 -- CYRILLIC CAPITAL LETTER SHCHA --
++                 254  1   1063 -- CYRILLIC CAPITAL LETTER CHE --
++                 255  1   1066 -- CYRILLIC CAPITAL LETTER HARD SIGN --
++
++CAPACITY
++        PUBLIC  "ISO 8879:1986//CAPACITY Reference//EN"
++
++SCOPE
++        DOCUMENT
++
++SYNTAX
++        SHUNCHAR CONTROLS
++
++        -- The Syntax-Reference Character Set is defined similarly to   --
++        -- the document character set.  It is, however, used            --
++        -- exclusively to define the concrete syntax.  The character    --
++        -- assignments within it are used only in the following         --
++        -- concrete syntax definitions.                                 --
++
++        -- The main function of the concrete syntax is to assign        --
++        -- meanings to characters: to make them name characters or      --
++        -- function characters, such as RE.  The meaning given in the   --
++        -- concrete syntax to each syntax-reference character is        --
++        -- assigned to the corresponding base character.  The document  --
++        -- character set then assigns these meanings to the characters  --
++        -- in the parsed document itself.                               --
++
++        BASESET  "ISO Registration Number 176//CHARSET
++                  ISO/IEC 10646-1:1993 UCS-2
++                  with implementation level 3//ESC 2/5 2/15 4/5"
++
++        DESCSET    0   9 UNUSED
++                   9   2      9
++                  11   2 UNUSED
++                  13   1     13
++                  14  18 UNUSED
++                  32  95     32
++                 127  42 UNUSED
++                 169   1    169         -- COPYRIGHT SIGN --
++                 170 855 UNUSED
++	        1025   1   1025         -- CYRILLIC CAPITAL LETTER IO --
++                1026  14 UNUSED
++                1040  64   1040         -- russian alphabet except io --
++                1104   1 UNUSED
++                1105   1   1105         -- CYRILLIC SMALL LETTER IO --
++
++        FUNCTION
++                RE              13      -- record end --
++                RS              10      -- record start --
++                SPACE           32      -- space --
++                TAB SEPCHAR      9      -- additional spec chars --
++
++        NAMING
++                -- Now declare that russian letters are valid chars to --
++                -- start names with and define case mapping as well.   --
++                -- Use TC for Extended Naming Rules (Annex J to 8879). --
++
++                LCNMSTRT 1072-1103 1105
++                UCNMSTRT 1040-1071 1025
++                LCNMCHAR  ".-"
++                UCNMCHAR  ".-"
++                NAMECASE  GENERAL YES
++                          ENTITY  NO
++
++        DELIM
++                GENERAL   SGMLREF
++                SHORTREF  SGMLREF
++
++        NAMES   SGMLREF
++
++        QUANTITY
++                SGMLREF
++                ATTCNT    99999999
++                ATTSPLEN  99999999
++                DTEMPLEN  24000
++                ENTLVL    99999999
++                GRPCNT    99999999
++                GRPGTCNT  99999999
++                GRPLVL    99999999
++                LITLEN    24000
++                NAMELEN   99999999
++                PILEN     24000
++                TAGLEN    99999999
++                TAGLVL    99999999
++
++-- End of SYNTAX --
++
++FEATURES -- feature usage declarations --
++
++        MINIMIZE
++                DATATAG  NO
++                OMITTAG  YES
++                RANK     YES
++                SHORTTAG YES
++
++        LINK
++                SIMPLE   YES 1000
++                IMPLICIT YES
++                EXPLICIT YES 1
++
++        OTHER
++                CONCUR  NO
++                SUBDOC  YES 99999999
++                FORMAL  YES
++
++-- End of FEATURES --
++
++APPINFO
++        NONE
++>
diff --git a/textproc/jade/pkg-plist b/textproc/jade/pkg-plist
index 57fd83c0c9b..a9cca50200c 100644
--- a/textproc/jade/pkg-plist
+++ b/textproc/jade/pkg-plist
@@ -135,6 +135,7 @@ include/sp/XcharMap.cxx
 include/sp/XcharMap.h
 include/sp/config.h
 include/sp/constant.h
+include/sp/koi8-r.h
 include/sp/macros.h
 include/sp/rtti.h
 include/sp/sptchar.h
author	kuriyama <kuriyama@FreeBSD.org>	2004-08-04 07:25:17 +0800
committer	kuriyama <kuriyama@FreeBSD.org>	2004-08-04 07:25:17 +0800
commit	dc76a42c492e7be9d38b59fee98cec45c3507e12 (patch)
tree	b4a7b1415e0e29fddae9f7329e7a06d058ddaa42 /textproc
parent	d6b310e8e39aee9725910036554d2e88b42e63fe (diff)
download	freebsd-ports-graphics-dc76a42c492e7be9d38b59fee98cec45c3507e12.tar.gz freebsd-ports-graphics-dc76a42c492e7be9d38b59fee98cec45c3507e12.tar.zst freebsd-ports-graphics-dc76a42c492e7be9d38b59fee98cec45c3507e12.zip