Allow for documentation on mapping files
Ignore-this: 5bcfdd21c16464e29fb7498dd6ec8915 Use the first commented lines in each mapping file as the docstring for the whole module. darcs-hash:20090829125423-a4fee-c99efd2cccdaa24d3b283aca0d43cd7ff6970c7f
This commit is contained in:
parent
56867f5768
commit
b12649a071
@ -1,21 +1,5 @@
|
||||
#
|
||||
# Name: cp1250 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 04/15/98
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1250 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1250 order
|
||||
#
|
||||
#Implements the Windows-1250 encoding.
|
||||
#For details, refer to <http://en.wikipedia.org/wiki/Windows-1250>.
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp1251 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 04/15/98
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1251 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1251 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp1252 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 04/15/98
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1252 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1252 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp1253 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 04/15/98
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1253 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1253 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp1254 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 04/15/98
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1254 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1254 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp1255 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 1/7/2000
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1255 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1255 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp1256 to Unicode table
|
||||
# Unicode version: 2.1
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 01/5/99
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1256 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1256 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp1257 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 04/15/98
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1257 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1257 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp1258 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.01
|
||||
# Table format: Format A
|
||||
# Date: 04/15/98
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp1258 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp1258 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp437_DOSLatinUS to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp437_DOSLatinUS code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp437_DOSLatinUS order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp737_DOSGreek to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp737_DOSGreek code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp737_DOSGreek order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp775_DOSBaltRim to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp775_DOSBaltRim code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp775_DOSBaltRim order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp850_DOSLatin1 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp850_DOSLatin1 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp850_DOSLatin1 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp852_DOSLatin2 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp852_DOSLatin2 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp852_DOSLatin2 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp855_DOSCyrillic to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp855_DOSCyrillic code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp855_DOSCyrillic order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp857_DOSTurkish to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp857_DOSTurkish code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp857_DOSTurkish order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp860_DOSPortuguese to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp860_DOSPortuguese code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp860_DOSPortuguese order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp861_DOSIcelandic to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp861_DOSIcelandic code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp861_DOSIcelandic order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp862_DOSHebrew to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp862_DOSHebrew code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp862_DOSHebrew order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp863_DOSCanadaF to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp863_DOSCanadaF code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp863_DOSCanadaF order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp864_DOSArabic to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp864_DOSArabic code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp864_DOSArabic order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp865_DOSNordic to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp865_DOSNordic code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp865_DOSNordic order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp866_DOSCyrillicRussian to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp866_DOSCyrillicRussian code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp866_DOSCyrillicRussian order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,20 +1,3 @@
|
||||
#
|
||||
# Name: cp869_DOSGreek2 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/24/96
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp869_DOSGreek2 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp869_DOSGreek2 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,21 +1,3 @@
|
||||
#
|
||||
# Name: cp874 to Unicode table
|
||||
# Unicode version: 2.0
|
||||
# Table version: 2.00
|
||||
# Table format: Format A
|
||||
# Date: 04/15/98
|
||||
#
|
||||
# Contact: Shawn.Steele@microsoft.com
|
||||
#
|
||||
# General notes: none
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the cp874 code (in hex)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
||||
#
|
||||
# The entries are in cp874 order
|
||||
#
|
||||
0x00 0x0000 #NULL
|
||||
0x01 0x0001 #START OF HEADING
|
||||
0x02 0x0002 #START OF TEXT
|
||||
|
||||
@ -1,53 +1,3 @@
|
||||
#
|
||||
# Name: JIS X 0201 (1976) to Unicode 1.1 Table
|
||||
# Unicode version: 1.1
|
||||
# Table version: 0.9
|
||||
# Table format: Format A
|
||||
# Date: 8 March 1994
|
||||
#
|
||||
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
|
||||
#
|
||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
||||
# No claims are made as to fitness for any particular purpose. No
|
||||
# warranties of any kind are expressed or implied. The recipient
|
||||
# agrees to determine applicability of information provided. If this
|
||||
# file has been provided on magnetic media by Unicode, Inc., the sole
|
||||
# remedy for any claim will be exchange of defective media within 90
|
||||
# days of receipt.
|
||||
#
|
||||
# Recipient is granted the right to make copies in any form for
|
||||
# internal distribution and to freely use the information supplied
|
||||
# in the creation of products supporting Unicode. Unicode, Inc.
|
||||
# specifically excludes the right to re-distribute this file directly
|
||||
# to third parties or other organizations whether for profit or not.
|
||||
#
|
||||
# General notes:
|
||||
#
|
||||
#
|
||||
# This table contains one set of mappings from JIS X 0201 into Unicode.
|
||||
# Note that these data are *possible* mappings only and may not be the
|
||||
# same as those used by actual products, nor may they be the best suited
|
||||
# for all uses. For more information on the mappings between various code
|
||||
# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
|
||||
# VENDORS mapping data. Normative information on the mapping between
|
||||
# JIS X 0201 and Unicode may be found in the Unihan.txt file in the
|
||||
# latest Unicode Character Database.
|
||||
#
|
||||
# If you have carefully considered the fact that the mappings in
|
||||
# this table are only one possible set of mappings between JIS X 0201 and
|
||||
# Unicode and have no normative status, but still feel that you
|
||||
# have located an error in the table that requires fixing, you may
|
||||
# report any such error to errata@unicode.org.
|
||||
#
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the shift JIS code (in hex as 0xXX)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 the Unicode (ISO 10646) name (follows a comment sign)
|
||||
#
|
||||
# The entries are in JIS order
|
||||
#
|
||||
#
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
|
||||
@ -1,75 +1,3 @@
|
||||
#
|
||||
# Name: JIS X 0208 (1990) to Unicode
|
||||
# Unicode version: 1.1
|
||||
# Table version: 0.9
|
||||
# Table format: Format A
|
||||
# Date: 8 March 1994
|
||||
#
|
||||
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
|
||||
#
|
||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
||||
# No claims are made as to fitness for any particular purpose. No
|
||||
# warranties of any kind are expressed or implied. The recipient
|
||||
# agrees to determine applicability of information provided. If this
|
||||
# file has been provided on magnetic media by Unicode, Inc., the sole
|
||||
# remedy for any claim will be exchange of defective media within 90
|
||||
# days of receipt.
|
||||
#
|
||||
# Recipient is granted the right to make copies in any form for
|
||||
# internal distribution and to freely use the information supplied
|
||||
# in the creation of products supporting Unicode. Unicode, Inc.
|
||||
# specifically excludes the right to re-distribute this file directly
|
||||
# to third parties or other organizations whether for profit or not.
|
||||
#
|
||||
# General notes:
|
||||
#
|
||||
#
|
||||
# This table contains one set of mappings from JIS X 0208 (1990) into Unicode.
|
||||
# Note that these data are *possible* mappings only and may not be the
|
||||
# same as those used by actual products, nor may they be the best suited
|
||||
# for all uses. For more information on the mappings between various code
|
||||
# pages incorporating the repertoire of JIS X 0208 (1990) and Unicode, consult the
|
||||
# VENDORS mapping data. Normative information on the mapping between
|
||||
# JIS X 0208 (1990) and Unicode may be found in the Unihan.txt file in the
|
||||
# latest Unicode Character Database.
|
||||
#
|
||||
# If you have carefully considered the fact that the mappings in
|
||||
# this table are only one possible set of mappings between JIS X 0208 (1990)
|
||||
# and Unicode and have no normative status, but still feel that you
|
||||
# have located an error in the table that requires fixing, you may
|
||||
# report any such error to errata@unicode.org.
|
||||
#
|
||||
#
|
||||
# Format: Four tab-separated columns
|
||||
# Column #1 is the JIS X 0208 code (in hex as 0xXXXX)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 the Unicode name (follows a comment sign, '#')
|
||||
# The official names for Unicode characters U+4E00
|
||||
# to U+9FA5, inclusive, is "CJK UNIFIED IDEOGRAPH-XXXX",
|
||||
# where XXXX is the code point. Including all these
|
||||
# names in this file increases its size substantially
|
||||
# and needlessly. The token "<CJK>" is used for the
|
||||
# name of these characters. If necessary, it can be
|
||||
# expanded algorithmically by a parser or editor.
|
||||
#
|
||||
# The entries are in JIS X 0208 order
|
||||
#
|
||||
# The following algorithms can be used to change the hex form
|
||||
# of JIS 0208 to other standard forms:
|
||||
#
|
||||
# To change hex to EUC form, add 0x8080
|
||||
# To change hex to kuten form, first subtract 0x2020. Then
|
||||
# the high and low bytes correspond to the ku and ten of
|
||||
# the kuten form. For example, 0x2121 -> 0x0101 -> 0101;
|
||||
# 0x7426 -> 0x5406 -> 8406
|
||||
#
|
||||
# The kanji mappings are a normative part of ISO/IEC 10646. The
|
||||
# non-kanji mappings are provisional, pending definition of
|
||||
# official mappings by Japanese standards bodies
|
||||
#
|
||||
# Any comments or problems, contact <John_Jenkins@taligent.com>
|
||||
#
|
||||
#
|
||||
0x2121 0x3000 # IDEOGRAPHIC SPACE
|
||||
0x2122 0x3001 # IDEOGRAPHIC COMMA
|
||||
0x2123 0x3002 # IDEOGRAPHIC FULL STOP
|
||||
|
||||
@ -1,89 +1,3 @@
|
||||
#
|
||||
# Name: JIS X 0212 (1990) to Unicode
|
||||
# Unicode version: 1.1
|
||||
# Table version: 0.9
|
||||
# Table format: Format A
|
||||
# Date: 8 March 1994
|
||||
#
|
||||
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
|
||||
#
|
||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
||||
# No claims are made as to fitness for any particular purpose. No
|
||||
# warranties of any kind are expressed or implied. The recipient
|
||||
# agrees to determine applicability of information provided. If this
|
||||
# file has been provided on magnetic media by Unicode, Inc., the sole
|
||||
# remedy for any claim will be exchange of defective media within 90
|
||||
# days of receipt.
|
||||
#
|
||||
# Recipient is granted the right to make copies in any form for
|
||||
# internal distribution and to freely use the information supplied
|
||||
# in the creation of products supporting Unicode. Unicode, Inc.
|
||||
# specifically excludes the right to re-distribute this file directly
|
||||
# to third parties or other organizations whether for profit or not.
|
||||
#
|
||||
# General notes:
|
||||
#
|
||||
#
|
||||
# This table contains one set of mappings from JIS X 0212 into Unicode.
|
||||
# Note that these data are *possible* mappings only and may not be the
|
||||
# same as those used by actual products, nor may they be the best suited
|
||||
# for all uses. For more information on the mappings between various code
|
||||
# pages incorporating the repertoire of JIS X 0212 and Unicode, consult the
|
||||
# VENDORS mapping data. Normative information on the mapping between
|
||||
# JIS X 0212 and Unicode may be found in the Unihan.txt file in the
|
||||
# latest Unicode Character Database.
|
||||
#
|
||||
# If you have carefully considered the fact that the mappings in
|
||||
# this table are only one possible set of mappings between JIS X 0212 and
|
||||
# Unicode and have no normative status, but still feel that you
|
||||
# have located an error in the table that requires fixing, you may
|
||||
# report any such error to errata@unicode.org.
|
||||
#
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the JIS X 0212 code (in hex as 0xXXXX)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 the Unicode name (follows a comment sign, '#')
|
||||
# The official names for Unicode characters U+4E00
|
||||
# to U+9FA5, inclusive, is "CJK UNIFIED IDEOGRAPH-XXXX",
|
||||
# where XXXX is the code point. Including all these
|
||||
# names in this file increases its size substantially
|
||||
# and needlessly. The token "<CJK>" is used for the
|
||||
# name of these characters. If necessary, it can be
|
||||
# expanded algorithmically by a parser or editor.
|
||||
#
|
||||
# The entries are in JIS X 0212 order
|
||||
#
|
||||
# The following algorithms can be used to change the hex form
|
||||
# of JIS 0212 to other standard forms:
|
||||
#
|
||||
# To change hex to EUC form, add 0x8080
|
||||
# To change hex to kuten form, first subtract 0x2020. Then
|
||||
# the high and low bytes correspond to the ku and ten of
|
||||
# the kuten form. For example, 0x2121 -> 0x0101 -> 0101;
|
||||
# 0x6D63 -> 0x4D43 -> 7767
|
||||
#
|
||||
# The kanji mappings are a normative part of ISO/IEC 10646. The
|
||||
# non-kanji mappings are provisional, pending definition of
|
||||
# official mappings by Japanese standards bodies
|
||||
#
|
||||
# Any comments or problems, contact <John_Jenkins@taligent.com>
|
||||
#
|
||||
# Notes:
|
||||
#
|
||||
# 1. JIS X 0212 apparently unified the following two symbols
|
||||
# into a single character at 0x2922:
|
||||
#
|
||||
# LATIN CAPITAL LETTER D WITH STROKE
|
||||
# LATIN CAPITAL LETTER ETH
|
||||
#
|
||||
# However, JIS X 0212 maintains the distinction between
|
||||
# the lowercase forms of these two elements at 0x2942 and 0x2943.
|
||||
# Given the structre of these JIS encodings, it is clear that
|
||||
# 0x2922 and 0x2942 are intended to be a capital/small pair.
|
||||
# Consequently, in the Unicode mapping, 0x2922 is treated as
|
||||
# LATIN CAPITAL LETTER D WITH STROKE.
|
||||
#
|
||||
0x222F 0x02D8 # BREVE
|
||||
0x2230 0x02C7 # CARON (Mandarin Chinese third tone)
|
||||
0x2231 0x00B8 # CEDILLA
|
||||
|
||||
@ -1,83 +1,4 @@
|
||||
#=======================================================================
|
||||
# File name: ROMAN.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Roman
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal
|
||||
# utom<b5>.
|
||||
# b03 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b4>, ufrm<b3>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change
|
||||
# mapping of 0xDB from CURRENCY SIGN to
|
||||
# EURO SIGN. Matches internal utom<b3>,
|
||||
# ufrm<b3>.
|
||||
# n08 1998-Feb-05 Minor update to header comments
|
||||
# n06 1997-Dec-14 Add warning about future changes to 0xDB
|
||||
# from CURRENCY SIGN to EURO SIGN. Clarify
|
||||
# some header information
|
||||
# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>:
|
||||
# Change standard mapping for 0xBD from U+2126
|
||||
# to its canonical decomposition, U+03A9.
|
||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n9>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Roman code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Roman code order.
|
||||
#
|
||||
# One of these mappings requires the use of a corporate character.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Roman character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Roman:
|
||||
# ----------------------
|
||||
# /Notes on Mac OS Roman:/
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported directly in programming
|
||||
@ -117,8 +38,7 @@
|
||||
# interpreted as associated with these glyphs; they are usually
|
||||
# interpreted (if at all) as the control codes DC1-DC4.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
# /Unicode mapping issues and notes:/
|
||||
#
|
||||
# The following corporate zone Unicode character is used in this
|
||||
# mapping:
|
||||
@ -129,20 +49,17 @@
|
||||
# is not authorized for use without permission of Apple, and
|
||||
# unauthorized use might constitute trademark infringement.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
# /Details of mapping changes in each version:/
|
||||
#
|
||||
# Changes from version n08 to version b02:
|
||||
#
|
||||
# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from
|
||||
# * Encoding changed for Mac OS 8.5; change mapping of 0xDB from
|
||||
# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
|
||||
#
|
||||
# Changes from version n03 to version n04:
|
||||
#
|
||||
# - Change mapping of 0xBD from U+2126 to its canonical
|
||||
# * Change mapping of 0xBD from U+2126 to its canonical
|
||||
# decomposition, U+03A9.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
|
||||
@ -18,16 +18,22 @@ data MappingType
|
||||
| JISMapping
|
||||
deriving (Eq,Ord,Show,Read)
|
||||
|
||||
readTranslation :: Int -> FilePath -> IO [(Integer,Maybe Char)]
|
||||
readTranslation :: Int -> FilePath -> IO ([(Integer,Maybe Char)],[String])
|
||||
readTranslation offset file = do
|
||||
cont <- readFile file
|
||||
return $ mapMaybe (\ln -> case drop offset ln of
|
||||
[src] -> Just (src,Nothing)
|
||||
[src,trg] -> Just (src,Just $ chr $ fromIntegral trg)
|
||||
_ -> Nothing) (parseTranslationTable cont)
|
||||
cont <- fmap parseTranslationTable $ readFile file
|
||||
let docstr = mapMaybe snd (takeWhile (null.fst) cont)
|
||||
let trans = mapMaybe (\(ln,comm) -> case drop offset ln of
|
||||
[src] -> Just (src,Nothing)
|
||||
[src,trg] -> Just (src,Just $ chr $ fromIntegral trg)
|
||||
_ -> Nothing) cont
|
||||
return (trans,docstr)
|
||||
|
||||
parseTranslationTable :: String -> [[Integer]]
|
||||
parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont))
|
||||
parseTranslationTable :: String -> [([Integer],Maybe String)]
|
||||
parseTranslationTable cont = map (\ln -> let (trans,comm) = break (=='#') ln
|
||||
in (map read (words trans),case comm of
|
||||
"" -> Nothing
|
||||
_ -> Just (tail comm))
|
||||
) (lines cont)
|
||||
|
||||
{-fillTranslations :: (Ix a,Show a) => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)]
|
||||
fillTranslations f t = merge (range (f,t))
|
||||
@ -70,7 +76,7 @@ mappingPreprocessor = PreProcessor
|
||||
|
||||
preprocessMapping :: MappingType -> FilePath -> FilePath -> [String] -> String -> IO ()
|
||||
preprocessMapping tp src trg mods name = do
|
||||
trans <- readTranslation 0 src
|
||||
(trans,doc) <- readTranslation 0 src
|
||||
let mod = concat $ intersperse "." (mods++[name])
|
||||
let wsize = case tp of
|
||||
ISOMapping -> 1
|
||||
@ -106,7 +112,11 @@ preprocessMapping tp src trg mods name = do
|
||||
writeFile trg $ unlines $
|
||||
["{- This file has been auto-generated. Do not edit it. -}"
|
||||
,"{-# LANGUAGE MagicHash,DeriveDataTypeable #-}"
|
||||
,"module "++mod++"("++name++"(..)) where"
|
||||
]++(case doc of
|
||||
[] -> []
|
||||
_ -> ("{- | "++head doc):(map (\ln -> " "++ln) (tail doc)) ++ [" -}"])
|
||||
++
|
||||
["module "++mod++"("++name++"(..)) where"
|
||||
,""
|
||||
,"import Data.Encoding.Base"
|
||||
,"import Data.Encoding.ByteSource"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user