From b12649a071702c28df0ed98965553f148d3c6e19 Mon Sep 17 00:00:00 2001 From: Henning Guenther Date: Sat, 29 Aug 2009 05:54:23 -0700 Subject: [PATCH] Allow for documentation on mapping files Ignore-this: 5bcfdd21c16464e29fb7498dd6ec8915 Use the first commented lines in each mapping file as the docstring for the whole module. darcs-hash:20090829125423-a4fee-c99efd2cccdaa24d3b283aca0d43cd7ff6970c7f --- Data/Encoding/CP1250.mapping | 20 +----- Data/Encoding/CP1251.mapping | 18 ------ Data/Encoding/CP1252.mapping | 18 ------ Data/Encoding/CP1253.mapping | 18 ------ Data/Encoding/CP1254.mapping | 18 ------ Data/Encoding/CP1255.mapping | 18 ------ Data/Encoding/CP1256.mapping | 18 ------ Data/Encoding/CP1257.mapping | 18 ------ Data/Encoding/CP1258.mapping | 18 ------ Data/Encoding/CP437.mapping | 17 ----- Data/Encoding/CP737.mapping | 17 ----- Data/Encoding/CP775.mapping | 17 ----- Data/Encoding/CP850.mapping | 17 ----- Data/Encoding/CP852.mapping | 17 ----- Data/Encoding/CP855.mapping | 17 ----- Data/Encoding/CP857.mapping | 17 ----- Data/Encoding/CP860.mapping | 17 ----- Data/Encoding/CP861.mapping | 17 ----- Data/Encoding/CP862.mapping | 17 ----- Data/Encoding/CP863.mapping | 17 ----- Data/Encoding/CP864.mapping | 17 ----- Data/Encoding/CP865.mapping | 17 ----- Data/Encoding/CP866.mapping | 17 ----- Data/Encoding/CP869.mapping | 17 ----- Data/Encoding/CP874.mapping | 18 ------ Data/Encoding/JISX0201.mapping | 50 -------------- Data/Encoding/JISX0208.mapping2 | 72 --------------------- Data/Encoding/JISX0212.mapping2 | 86 ------------------------- Data/Encoding/MacOSRoman.mapping | 93 ++------------------------- Data/Encoding/Preprocessor/Mapping.hs | 30 ++++++--- 30 files changed, 27 insertions(+), 741 deletions(-) diff --git a/Data/Encoding/CP1250.mapping b/Data/Encoding/CP1250.mapping index 6bfab93..ce2f862 100644 --- a/Data/Encoding/CP1250.mapping +++ b/Data/Encoding/CP1250.mapping @@ -1,21 +1,5 @@ -# -# Name: cp1250 to Unicode table -# Unicode version: 2.0 -# Table version: 2.01 -# Table format: Format A -# Date: 04/15/98 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1250 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1250 order -# +#Implements the Windows-1250 encoding. +#For details, refer to . 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP1251.mapping b/Data/Encoding/CP1251.mapping index 4d9b355..2d12f77 100644 --- a/Data/Encoding/CP1251.mapping +++ b/Data/Encoding/CP1251.mapping @@ -1,21 +1,3 @@ -# -# Name: cp1251 to Unicode table -# Unicode version: 2.0 -# Table version: 2.01 -# Table format: Format A -# Date: 04/15/98 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1251 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1251 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP1252.mapping b/Data/Encoding/CP1252.mapping index 8ff4b20..ec2b27d 100644 --- a/Data/Encoding/CP1252.mapping +++ b/Data/Encoding/CP1252.mapping @@ -1,21 +1,3 @@ -# -# Name: cp1252 to Unicode table -# Unicode version: 2.0 -# Table version: 2.01 -# Table format: Format A -# Date: 04/15/98 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1252 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1252 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP1253.mapping b/Data/Encoding/CP1253.mapping index 20a55b0..41b4655 100644 --- a/Data/Encoding/CP1253.mapping +++ b/Data/Encoding/CP1253.mapping @@ -1,21 +1,3 @@ -# -# Name: cp1253 to Unicode table -# Unicode version: 2.0 -# Table version: 2.01 -# Table format: Format A -# Date: 04/15/98 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1253 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1253 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP1254.mapping b/Data/Encoding/CP1254.mapping index 987ed98..5e05683 100644 --- a/Data/Encoding/CP1254.mapping +++ b/Data/Encoding/CP1254.mapping @@ -1,21 +1,3 @@ -# -# Name: cp1254 to Unicode table -# Unicode version: 2.0 -# Table version: 2.01 -# Table format: Format A -# Date: 04/15/98 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1254 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1254 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP1255.mapping b/Data/Encoding/CP1255.mapping index 585f993..eecf0ca 100644 --- a/Data/Encoding/CP1255.mapping +++ b/Data/Encoding/CP1255.mapping @@ -1,21 +1,3 @@ -# -# Name: cp1255 to Unicode table -# Unicode version: 2.0 -# Table version: 2.01 -# Table format: Format A -# Date: 1/7/2000 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1255 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1255 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP1256.mapping b/Data/Encoding/CP1256.mapping index 244dcce..8a9956f 100644 --- a/Data/Encoding/CP1256.mapping +++ b/Data/Encoding/CP1256.mapping @@ -1,21 +1,3 @@ -# -# Name: cp1256 to Unicode table -# Unicode version: 2.1 -# Table version: 2.01 -# Table format: Format A -# Date: 01/5/99 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1256 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1256 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP1257.mapping b/Data/Encoding/CP1257.mapping index 0dc475e..d8e7f0d 100644 --- a/Data/Encoding/CP1257.mapping +++ b/Data/Encoding/CP1257.mapping @@ -1,21 +1,3 @@ -# -# Name: cp1257 to Unicode table -# Unicode version: 2.0 -# Table version: 2.01 -# Table format: Format A -# Date: 04/15/98 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1257 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1257 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP1258.mapping b/Data/Encoding/CP1258.mapping index f402b34..4dbdc38 100644 --- a/Data/Encoding/CP1258.mapping +++ b/Data/Encoding/CP1258.mapping @@ -1,21 +1,3 @@ -# -# Name: cp1258 to Unicode table -# Unicode version: 2.0 -# Table version: 2.01 -# Table format: Format A -# Date: 04/15/98 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp1258 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp1258 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP437.mapping b/Data/Encoding/CP437.mapping index 5fb2bef..f667db4 100644 --- a/Data/Encoding/CP437.mapping +++ b/Data/Encoding/CP437.mapping @@ -1,20 +1,3 @@ -# -# Name: cp437_DOSLatinUS to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp437_DOSLatinUS code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp437_DOSLatinUS order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP737.mapping b/Data/Encoding/CP737.mapping index defeae9..fbe7a00 100644 --- a/Data/Encoding/CP737.mapping +++ b/Data/Encoding/CP737.mapping @@ -1,20 +1,3 @@ -# -# Name: cp737_DOSGreek to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp737_DOSGreek code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp737_DOSGreek order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP775.mapping b/Data/Encoding/CP775.mapping index 677ae8c..08cb8e7 100644 --- a/Data/Encoding/CP775.mapping +++ b/Data/Encoding/CP775.mapping @@ -1,20 +1,3 @@ -# -# Name: cp775_DOSBaltRim to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp775_DOSBaltRim code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp775_DOSBaltRim order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP850.mapping b/Data/Encoding/CP850.mapping index 93046f3..77726f2 100644 --- a/Data/Encoding/CP850.mapping +++ b/Data/Encoding/CP850.mapping @@ -1,20 +1,3 @@ -# -# Name: cp850_DOSLatin1 to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp850_DOSLatin1 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp850_DOSLatin1 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP852.mapping b/Data/Encoding/CP852.mapping index b236c1f..89759d6 100644 --- a/Data/Encoding/CP852.mapping +++ b/Data/Encoding/CP852.mapping @@ -1,20 +1,3 @@ -# -# Name: cp852_DOSLatin2 to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp852_DOSLatin2 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp852_DOSLatin2 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP855.mapping b/Data/Encoding/CP855.mapping index 025dec6..4460431 100644 --- a/Data/Encoding/CP855.mapping +++ b/Data/Encoding/CP855.mapping @@ -1,20 +1,3 @@ -# -# Name: cp855_DOSCyrillic to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp855_DOSCyrillic code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp855_DOSCyrillic order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP857.mapping b/Data/Encoding/CP857.mapping index 880736d..396010d 100644 --- a/Data/Encoding/CP857.mapping +++ b/Data/Encoding/CP857.mapping @@ -1,20 +1,3 @@ -# -# Name: cp857_DOSTurkish to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp857_DOSTurkish code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp857_DOSTurkish order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP860.mapping b/Data/Encoding/CP860.mapping index f8fabad..bc2819d 100644 --- a/Data/Encoding/CP860.mapping +++ b/Data/Encoding/CP860.mapping @@ -1,20 +1,3 @@ -# -# Name: cp860_DOSPortuguese to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp860_DOSPortuguese code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp860_DOSPortuguese order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP861.mapping b/Data/Encoding/CP861.mapping index c474458..e99ac4c 100644 --- a/Data/Encoding/CP861.mapping +++ b/Data/Encoding/CP861.mapping @@ -1,20 +1,3 @@ -# -# Name: cp861_DOSIcelandic to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp861_DOSIcelandic code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp861_DOSIcelandic order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP862.mapping b/Data/Encoding/CP862.mapping index d6d1d86..7464642 100644 --- a/Data/Encoding/CP862.mapping +++ b/Data/Encoding/CP862.mapping @@ -1,20 +1,3 @@ -# -# Name: cp862_DOSHebrew to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp862_DOSHebrew code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp862_DOSHebrew order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP863.mapping b/Data/Encoding/CP863.mapping index 6af0231..cc27992 100644 --- a/Data/Encoding/CP863.mapping +++ b/Data/Encoding/CP863.mapping @@ -1,20 +1,3 @@ -# -# Name: cp863_DOSCanadaF to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp863_DOSCanadaF code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp863_DOSCanadaF order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP864.mapping b/Data/Encoding/CP864.mapping index f80fa2c..9e31491 100644 --- a/Data/Encoding/CP864.mapping +++ b/Data/Encoding/CP864.mapping @@ -1,20 +1,3 @@ -# -# Name: cp864_DOSArabic to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp864_DOSArabic code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp864_DOSArabic order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP865.mapping b/Data/Encoding/CP865.mapping index 0b32e37..c66dc52 100644 --- a/Data/Encoding/CP865.mapping +++ b/Data/Encoding/CP865.mapping @@ -1,20 +1,3 @@ -# -# Name: cp865_DOSNordic to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp865_DOSNordic code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp865_DOSNordic order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP866.mapping b/Data/Encoding/CP866.mapping index 51c996b..757717e 100644 --- a/Data/Encoding/CP866.mapping +++ b/Data/Encoding/CP866.mapping @@ -1,20 +1,3 @@ -# -# Name: cp866_DOSCyrillicRussian to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp866_DOSCyrillicRussian code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp866_DOSCyrillicRussian order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP869.mapping b/Data/Encoding/CP869.mapping index 1d0f9b9..3531742 100644 --- a/Data/Encoding/CP869.mapping +++ b/Data/Encoding/CP869.mapping @@ -1,20 +1,3 @@ -# -# Name: cp869_DOSGreek2 to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/24/96 -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp869_DOSGreek2 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp869_DOSGreek2 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/CP874.mapping b/Data/Encoding/CP874.mapping index cbd0cdd..a8e3010 100644 --- a/Data/Encoding/CP874.mapping +++ b/Data/Encoding/CP874.mapping @@ -1,21 +1,3 @@ -# -# Name: cp874 to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 -# Table format: Format A -# Date: 04/15/98 -# -# Contact: Shawn.Steele@microsoft.com -# -# General notes: none -# -# Format: Three tab-separated columns -# Column #1 is the cp874 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 is the Unicode name (follows a comment sign, '#') -# -# The entries are in cp874 order -# 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT diff --git a/Data/Encoding/JISX0201.mapping b/Data/Encoding/JISX0201.mapping index 5525a68..0534449 100644 --- a/Data/Encoding/JISX0201.mapping +++ b/Data/Encoding/JISX0201.mapping @@ -1,53 +1,3 @@ -# -# Name: JIS X 0201 (1976) to Unicode 1.1 Table -# Unicode version: 1.1 -# Table version: 0.9 -# Table format: Format A -# Date: 8 March 1994 -# -# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved. -# -# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). -# No claims are made as to fitness for any particular purpose. No -# warranties of any kind are expressed or implied. The recipient -# agrees to determine applicability of information provided. If this -# file has been provided on magnetic media by Unicode, Inc., the sole -# remedy for any claim will be exchange of defective media within 90 -# days of receipt. -# -# Recipient is granted the right to make copies in any form for -# internal distribution and to freely use the information supplied -# in the creation of products supporting Unicode. Unicode, Inc. -# specifically excludes the right to re-distribute this file directly -# to third parties or other organizations whether for profit or not. -# -# General notes: -# -# -# This table contains one set of mappings from JIS X 0201 into Unicode. -# Note that these data are *possible* mappings only and may not be the -# same as those used by actual products, nor may they be the best suited -# for all uses. For more information on the mappings between various code -# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the -# VENDORS mapping data. Normative information on the mapping between -# JIS X 0201 and Unicode may be found in the Unihan.txt file in the -# latest Unicode Character Database. -# -# If you have carefully considered the fact that the mappings in -# this table are only one possible set of mappings between JIS X 0201 and -# Unicode and have no normative status, but still feel that you -# have located an error in the table that requires fixing, you may -# report any such error to errata@unicode.org. -# -# -# Format: Three tab-separated columns -# Column #1 is the shift JIS code (in hex as 0xXX) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 the Unicode (ISO 10646) name (follows a comment sign) -# -# The entries are in JIS order -# -# 0x20 0x0020 # SPACE 0x21 0x0021 # EXCLAMATION MARK 0x22 0x0022 # QUOTATION MARK diff --git a/Data/Encoding/JISX0208.mapping2 b/Data/Encoding/JISX0208.mapping2 index c774e53..9dfa5a8 100644 --- a/Data/Encoding/JISX0208.mapping2 +++ b/Data/Encoding/JISX0208.mapping2 @@ -1,75 +1,3 @@ -# -# Name: JIS X 0208 (1990) to Unicode -# Unicode version: 1.1 -# Table version: 0.9 -# Table format: Format A -# Date: 8 March 1994 -# -# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved. -# -# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). -# No claims are made as to fitness for any particular purpose. No -# warranties of any kind are expressed or implied. The recipient -# agrees to determine applicability of information provided. If this -# file has been provided on magnetic media by Unicode, Inc., the sole -# remedy for any claim will be exchange of defective media within 90 -# days of receipt. -# -# Recipient is granted the right to make copies in any form for -# internal distribution and to freely use the information supplied -# in the creation of products supporting Unicode. Unicode, Inc. -# specifically excludes the right to re-distribute this file directly -# to third parties or other organizations whether for profit or not. -# -# General notes: -# -# -# This table contains one set of mappings from JIS X 0208 (1990) into Unicode. -# Note that these data are *possible* mappings only and may not be the -# same as those used by actual products, nor may they be the best suited -# for all uses. For more information on the mappings between various code -# pages incorporating the repertoire of JIS X 0208 (1990) and Unicode, consult the -# VENDORS mapping data. Normative information on the mapping between -# JIS X 0208 (1990) and Unicode may be found in the Unihan.txt file in the -# latest Unicode Character Database. -# -# If you have carefully considered the fact that the mappings in -# this table are only one possible set of mappings between JIS X 0208 (1990) -# and Unicode and have no normative status, but still feel that you -# have located an error in the table that requires fixing, you may -# report any such error to errata@unicode.org. -# -# -# Format: Four tab-separated columns -# Column #1 is the JIS X 0208 code (in hex as 0xXXXX) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 the Unicode name (follows a comment sign, '#') -# The official names for Unicode characters U+4E00 -# to U+9FA5, inclusive, is "CJK UNIFIED IDEOGRAPH-XXXX", -# where XXXX is the code point. Including all these -# names in this file increases its size substantially -# and needlessly. The token "" is used for the -# name of these characters. If necessary, it can be -# expanded algorithmically by a parser or editor. -# -# The entries are in JIS X 0208 order -# -# The following algorithms can be used to change the hex form -# of JIS 0208 to other standard forms: -# -# To change hex to EUC form, add 0x8080 -# To change hex to kuten form, first subtract 0x2020. Then -# the high and low bytes correspond to the ku and ten of -# the kuten form. For example, 0x2121 -> 0x0101 -> 0101; -# 0x7426 -> 0x5406 -> 8406 -# -# The kanji mappings are a normative part of ISO/IEC 10646. The -# non-kanji mappings are provisional, pending definition of -# official mappings by Japanese standards bodies -# -# Any comments or problems, contact -# -# 0x2121 0x3000 # IDEOGRAPHIC SPACE 0x2122 0x3001 # IDEOGRAPHIC COMMA 0x2123 0x3002 # IDEOGRAPHIC FULL STOP diff --git a/Data/Encoding/JISX0212.mapping2 b/Data/Encoding/JISX0212.mapping2 index c1ea9f3..c7bab40 100644 --- a/Data/Encoding/JISX0212.mapping2 +++ b/Data/Encoding/JISX0212.mapping2 @@ -1,89 +1,3 @@ -# -# Name: JIS X 0212 (1990) to Unicode -# Unicode version: 1.1 -# Table version: 0.9 -# Table format: Format A -# Date: 8 March 1994 -# -# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved. -# -# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). -# No claims are made as to fitness for any particular purpose. No -# warranties of any kind are expressed or implied. The recipient -# agrees to determine applicability of information provided. If this -# file has been provided on magnetic media by Unicode, Inc., the sole -# remedy for any claim will be exchange of defective media within 90 -# days of receipt. -# -# Recipient is granted the right to make copies in any form for -# internal distribution and to freely use the information supplied -# in the creation of products supporting Unicode. Unicode, Inc. -# specifically excludes the right to re-distribute this file directly -# to third parties or other organizations whether for profit or not. -# -# General notes: -# -# -# This table contains one set of mappings from JIS X 0212 into Unicode. -# Note that these data are *possible* mappings only and may not be the -# same as those used by actual products, nor may they be the best suited -# for all uses. For more information on the mappings between various code -# pages incorporating the repertoire of JIS X 0212 and Unicode, consult the -# VENDORS mapping data. Normative information on the mapping between -# JIS X 0212 and Unicode may be found in the Unihan.txt file in the -# latest Unicode Character Database. -# -# If you have carefully considered the fact that the mappings in -# this table are only one possible set of mappings between JIS X 0212 and -# Unicode and have no normative status, but still feel that you -# have located an error in the table that requires fixing, you may -# report any such error to errata@unicode.org. -# -# -# Format: Three tab-separated columns -# Column #1 is the JIS X 0212 code (in hex as 0xXXXX) -# Column #2 is the Unicode (in hex as 0xXXXX) -# Column #3 the Unicode name (follows a comment sign, '#') -# The official names for Unicode characters U+4E00 -# to U+9FA5, inclusive, is "CJK UNIFIED IDEOGRAPH-XXXX", -# where XXXX is the code point. Including all these -# names in this file increases its size substantially -# and needlessly. The token "" is used for the -# name of these characters. If necessary, it can be -# expanded algorithmically by a parser or editor. -# -# The entries are in JIS X 0212 order -# -# The following algorithms can be used to change the hex form -# of JIS 0212 to other standard forms: -# -# To change hex to EUC form, add 0x8080 -# To change hex to kuten form, first subtract 0x2020. Then -# the high and low bytes correspond to the ku and ten of -# the kuten form. For example, 0x2121 -> 0x0101 -> 0101; -# 0x6D63 -> 0x4D43 -> 7767 -# -# The kanji mappings are a normative part of ISO/IEC 10646. The -# non-kanji mappings are provisional, pending definition of -# official mappings by Japanese standards bodies -# -# Any comments or problems, contact -# -# Notes: -# -# 1. JIS X 0212 apparently unified the following two symbols -# into a single character at 0x2922: -# -# LATIN CAPITAL LETTER D WITH STROKE -# LATIN CAPITAL LETTER ETH -# -# However, JIS X 0212 maintains the distinction between -# the lowercase forms of these two elements at 0x2942 and 0x2943. -# Given the structre of these JIS encodings, it is clear that -# 0x2922 and 0x2942 are intended to be a capital/small pair. -# Consequently, in the Unicode mapping, 0x2922 is treated as -# LATIN CAPITAL LETTER D WITH STROKE. -# 0x222F 0x02D8 # BREVE 0x2230 0x02C7 # CARON (Mandarin Chinese third tone) 0x2231 0x00B8 # CEDILLA diff --git a/Data/Encoding/MacOSRoman.mapping b/Data/Encoding/MacOSRoman.mapping index 5b3b8b4..969ada7 100644 --- a/Data/Encoding/MacOSRoman.mapping +++ b/Data/Encoding/MacOSRoman.mapping @@ -1,83 +1,4 @@ -#======================================================================= -# File name: ROMAN.TXT -# -# Contents: Map (external version) from Mac OS Roman -# character set to Unicode 2.1 and later. -# -# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights -# reserved. -# -# Contact: charsets@apple.com -# -# Changes: -# -# c02 2005-Apr-05 Update header comments. Matches internal xml -# and Text Encoding Converter 2.0. -# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal -# utom. -# b03 1999-Sep-22 Update contact e-mail address. Matches -# internal utom, ufrm, and Text -# Encoding Converter version 1.5. -# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change -# mapping of 0xDB from CURRENCY SIGN to -# EURO SIGN. Matches internal utom, -# ufrm. -# n08 1998-Feb-05 Minor update to header comments -# n06 1997-Dec-14 Add warning about future changes to 0xDB -# from CURRENCY SIGN to EURO SIGN. Clarify -# some header information -# n04 1997-Dec-01 Update to match internal utom, ufrm: -# Change standard mapping for 0xBD from U+2126 -# to its canonical decomposition, U+03A9. -# n03 1995-Apr-15 First version (after fixing some typos). -# Matches internal ufrm. -# -# Standard header: -# ---------------- -# -# Apple, the Apple logo, and Macintosh are trademarks of Apple -# Computer, Inc., registered in the United States and other countries. -# Unicode is a trademark of Unicode Inc. For the sake of brevity, -# throughout this document, "Macintosh" can be used to refer to -# Macintosh computers and "Unicode" can be used to refer to the -# Unicode standard. -# -# Apple Computer, Inc. ("Apple") makes no warranty or representation, -# either express or implied, with respect to this document and the -# included data, its quality, accuracy, or fitness for a particular -# purpose. In no event will Apple be liable for direct, indirect, -# special, incidental, or consequential damages resulting from any -# defect or inaccuracy in this document or the included data. -# -# These mapping tables and character lists are subject to change. -# The latest tables should be available from the following: -# -# -# -# For general information about Mac OS encodings and these mapping -# tables, see the file "README.TXT". -# -# Format: -# ------- -# -# Three tab-separated columns; -# '#' begins a comment which continues to the end of the line. -# Column #1 is the Mac OS Roman code (in hex as 0xNN) -# Column #2 is the corresponding Unicode (in hex as 0xNNNN) -# Column #3 is a comment containing the Unicode name -# -# The entries are in Mac OS Roman code order. -# -# One of these mappings requires the use of a corporate character. -# See the file "CORPCHAR.TXT" and notes below. -# -# Control character mappings are not shown in this table, following -# the conventions of the standard UTC mapping tables. However, the -# Mac OS Roman character set uses the standard control characters at -# 0x00-0x1F and 0x7F. -# -# Notes on Mac OS Roman: -# ---------------------- +# /Notes on Mac OS Roman:/ # # This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa # environments, it is only supported directly in programming @@ -117,8 +38,7 @@ # interpreted as associated with these glyphs; they are usually # interpreted (if at all) as the control codes DC1-DC4. # -# Unicode mapping issues and notes: -# --------------------------------- +# /Unicode mapping issues and notes:/ # # The following corporate zone Unicode character is used in this # mapping: @@ -129,20 +49,17 @@ # is not authorized for use without permission of Apple, and # unauthorized use might constitute trademark infringement. # -# Details of mapping changes in each version: -# ------------------------------------------- +# /Details of mapping changes in each version:/ # # Changes from version n08 to version b02: # -# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# * Encoding changed for Mac OS 8.5; change mapping of 0xDB from # CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). # # Changes from version n03 to version n04: # -# - Change mapping of 0xBD from U+2126 to its canonical +# * Change mapping of 0xBD from U+2126 to its canonical # decomposition, U+03A9. -# -################## 0x20 0x0020 # SPACE 0x21 0x0021 # EXCLAMATION MARK diff --git a/Data/Encoding/Preprocessor/Mapping.hs b/Data/Encoding/Preprocessor/Mapping.hs index c899b8f..736d772 100644 --- a/Data/Encoding/Preprocessor/Mapping.hs +++ b/Data/Encoding/Preprocessor/Mapping.hs @@ -18,16 +18,22 @@ data MappingType | JISMapping deriving (Eq,Ord,Show,Read) -readTranslation :: Int -> FilePath -> IO [(Integer,Maybe Char)] +readTranslation :: Int -> FilePath -> IO ([(Integer,Maybe Char)],[String]) readTranslation offset file = do - cont <- readFile file - return $ mapMaybe (\ln -> case drop offset ln of - [src] -> Just (src,Nothing) - [src,trg] -> Just (src,Just $ chr $ fromIntegral trg) - _ -> Nothing) (parseTranslationTable cont) + cont <- fmap parseTranslationTable $ readFile file + let docstr = mapMaybe snd (takeWhile (null.fst) cont) + let trans = mapMaybe (\(ln,comm) -> case drop offset ln of + [src] -> Just (src,Nothing) + [src,trg] -> Just (src,Just $ chr $ fromIntegral trg) + _ -> Nothing) cont + return (trans,docstr) -parseTranslationTable :: String -> [[Integer]] -parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont)) +parseTranslationTable :: String -> [([Integer],Maybe String)] +parseTranslationTable cont = map (\ln -> let (trans,comm) = break (=='#') ln + in (map read (words trans),case comm of + "" -> Nothing + _ -> Just (tail comm)) + ) (lines cont) {-fillTranslations :: (Ix a,Show a) => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)] fillTranslations f t = merge (range (f,t)) @@ -70,7 +76,7 @@ mappingPreprocessor = PreProcessor preprocessMapping :: MappingType -> FilePath -> FilePath -> [String] -> String -> IO () preprocessMapping tp src trg mods name = do - trans <- readTranslation 0 src + (trans,doc) <- readTranslation 0 src let mod = concat $ intersperse "." (mods++[name]) let wsize = case tp of ISOMapping -> 1 @@ -106,7 +112,11 @@ preprocessMapping tp src trg mods name = do writeFile trg $ unlines $ ["{- This file has been auto-generated. Do not edit it. -}" ,"{-# LANGUAGE MagicHash,DeriveDataTypeable #-}" - ,"module "++mod++"("++name++"(..)) where" + ]++(case doc of + [] -> [] + _ -> ("{- | "++head doc):(map (\ln -> " "++ln) (tail doc)) ++ [" -}"]) + ++ + ["module "++mod++"("++name++"(..)) where" ,"" ,"import Data.Encoding.Base" ,"import Data.Encoding.ByteSource"