WebCore/platform/UnicodeRange.cpp - WebKit - Git at Google

 /*
  * Copyright (C) 2007 Apple Computer, Inc.
  *
  * Portions are Copyright (C) 1998 Netscape Communications Corporation.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  *
  * Alternatively, the contents of this file may be used under the terms
  * of either the Mozilla Public License Version 1.1, found at
  * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
  * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
  * (the "GPL"), in which case the provisions of the MPL or the GPL are
  * applicable instead of those above.  If you wish to allow use of your
  * version of this file only under the terms of one of those two
  * licenses (the MPL or the GPL) and not to allow others to use your
  * version of this file under the LGPL, indicate your decision by
  * deletingthe provisions above and replace them with the notice and
  * other provisions required by the MPL or the GPL, as the case may be.
  * If you do not delete the provisions above, a recipient may use your
  * version of this file under any of the LGPL, the MPL or the GPL.
  */

 #include "config.h"
 #include "UnicodeRange.h"

 namespace WebCore {

 // This table depends on unicode range definitions.
 // Each item's index must correspond to a unicode range value
 // eg. x-cyrillic = LangGroupTable[cRangeCyrillic]
 static const char* gUnicodeRangeToLangGroupTable[] =
 {
   "x-cyrillic",
   "el",
   "tr",
   "he",
   "ar",
   "x-baltic",
   "th",
   "ko",
   "ja",
   "zh-CN",
   "zh-TW",
   "x-devanagari",
   "x-tamil",
   "x-armn",
   "x-beng",
   "x-cans",
   "x-ethi",
   "x-geor",
   "x-gujr",
   "x-guru",
   "x-khmr",
   "x-mlym"
 };

 /**********************************************************************
  * Unicode subranges as defined in unicode 3.0
  * x-western, x-central-euro, tr, x-baltic  -> latin
  *  0000 - 036f
  *  1e00 - 1eff
  *  2000 - 206f  (general punctuation)
  *  20a0 - 20cf  (currency symbols)
  *  2100 - 214f  (letterlike symbols)
  *  2150 - 218f  (Number Forms)
  * el         -> greek
  *  0370 - 03ff
  *  1f00 - 1fff
  * x-cyrillic -> cyrillic
  *  0400 - 04ff
  * he         -> hebrew
  *  0590 - 05ff
  * ar         -> arabic
  *  0600 - 06ff
  *  fb50 - fdff (arabic presentation forms)
  *  fe70 - feff (arabic presentation forms b)
  * th - thai
  *  0e00 - 0e7f
  * ko        -> korean
  *  ac00 - d7af  (hangul Syllables)
  *  1100 - 11ff    (jamo)
  *  3130 - 318f (hangul compatibility jamo)
  * ja
  *  3040 - 309f (hiragana)
  *  30a0 - 30ff (katakana)
  * zh-CN
  * zh-TW
  *
  * CJK
  *  3100 - 312f (bopomofo)
  *  31a0 - 31bf (bopomofo extended)
  *  3000 - 303f (CJK Symbols and Punctuation)
  *  2e80 - 2eff (CJK radicals supplement)
  *  2f00 - 2fdf (Kangxi Radicals)
  *  2ff0 - 2fff (Ideographic Description Characters)
  *  3190 - 319f (kanbun)
  *  3200 - 32ff (Enclosed CJK letters and Months)
  *  3300 - 33ff (CJK compatibility)
  *  3400 - 4dbf (CJK Unified Ideographs Extension A)
  *  4e00 - 9faf (CJK Unified Ideographs)
  *  f900 - fa5f (CJK Compatibility Ideographs)
  *  fe30 - fe4f (CJK compatibility Forms)
  *  ff00 - ffef (halfwidth and fullwidth forms)
  *
  * Armenian
  *  0530 - 058f
  * Sriac
  *  0700 - 074f
  * Thaana
  *  0780 - 07bf
  * Devanagari
  *  0900 - 097f
  * Bengali
  *  0980 - 09ff
  * Gurmukhi
  *  0a00 - 0a7f
  * Gujarati
  *  0a80 - 0aff
  * Oriya
  *  0b00 - 0b7f
  * Tamil
  *  0b80 - 0bff
  * Telugu
  *  0c00 - 0c7f
  * Kannada
  *  0c80 - 0cff
  * Malayalam
  *  0d00 - 0d7f
  * Sinhala
  *  0d80 - 0def
  * Lao
  *  0e80 - 0eff
  * Tibetan
  *  0f00 - 0fbf
  * Myanmar
  *  1000 - 109f
  * Georgian
  *  10a0 - 10ff
  * Ethiopic
  *  1200 - 137f
  * Cherokee
  *  13a0 - 13ff
  * Canadian Aboriginal Syllabics
  *  1400 - 167f
  * Ogham
  *  1680 - 169f
  * Runic
  *  16a0 - 16ff
  * Khmer
  *  1780 - 17ff
  * Mongolian
  *  1800 - 18af
  * Misc - superscripts and subscripts
  *  2070 - 209f
  * Misc - Combining Diacritical Marks for Symbols
  *  20d0 - 20ff
  * Misc - Arrows
  *  2190 - 21ff
  * Misc - Mathematical Operators
  *  2200 - 22ff
  * Misc - Miscellaneous Technical
  *  2300 - 23ff
  * Misc - Control picture
  *  2400 - 243f
  * Misc - Optical character recognition
  *  2440 - 2450
  * Misc - Enclose Alphanumerics
  *  2460 - 24ff
  * Misc - Box Drawing
  *  2500 - 257f
  * Misc - Block Elements
  *  2580 - 259f
  * Misc - Geometric Shapes
  *  25a0 - 25ff
  * Misc - Miscellaneous Symbols
  *  2600 - 267f
  * Misc - Dingbats
  *  2700 - 27bf
  * Misc - Braille Patterns
  *  2800 - 28ff
  * Yi Syllables
  *  a000 - a48f
  * Yi radicals
  *  a490 - a4cf
  * Alphabetic Presentation Forms
  *  fb00 - fb4f
  * Misc - Combining half Marks
  *  fe20 - fe2f
  * Misc - small form variants
  *  fe50 - fe6f
  * Misc - Specials
  *  fff0 - ffff
  *********************************************************************/

 static const unsigned cNumSubTables = 9;
 static const unsigned cSubTableSize = 16;

 static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] =
 {
   { // table for X---
     cRangeTableBase+1,  //u0xxx
     cRangeTableBase+2,  //u1xxx
     cRangeTableBase+3,  //u2xxx
     cRangeSetCJK,       //u3xxx
     cRangeSetCJK,       //u4xxx
     cRangeSetCJK,       //u5xxx
     cRangeSetCJK,       //u6xxx
     cRangeSetCJK,       //u7xxx
     cRangeSetCJK,       //u8xxx
     cRangeSetCJK,       //u9xxx
     cRangeTableBase+4,  //uaxxx
     cRangeKorean,       //ubxxx
     cRangeKorean,       //ucxxx
     cRangeTableBase+5,  //udxxx
     cRangePrivate,      //uexxx
     cRangeTableBase+6   //ufxxx
   },
   { //table for 0X--
     cRangeSetLatin,          //u00xx
     cRangeSetLatin,          //u01xx
     cRangeSetLatin,          //u02xx
     cRangeGreek,             //u03xx     XXX 0300-036f is in fact cRangeCombiningDiacriticalMarks
     cRangeCyrillic,          //u04xx
     cRangeTableBase+7,       //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
     cRangeArabic,            //u06xx
     cRangeTertiaryTable,     //u07xx
     cRangeUnassigned,        //u08xx
     cRangeTertiaryTable,     //u09xx
     cRangeTertiaryTable,     //u0axx
     cRangeTertiaryTable,     //u0bxx
     cRangeTertiaryTable,     //u0cxx
     cRangeTertiaryTable,     //u0dxx
     cRangeTertiaryTable,     //u0exx
     cRangeTibetan,           //u0fxx
   },
   { //table for 1x--
     cRangeTertiaryTable,     //u10xx
     cRangeKorean,            //u11xx
     cRangeEthiopic,          //u12xx
     cRangeTertiaryTable,     //u13xx
     cRangeCanadian,          //u14xx
     cRangeCanadian,          //u15xx
     cRangeTertiaryTable,     //u16xx
     cRangeKhmer,             //u17xx
     cRangeMongolian,         //u18xx
     cRangeUnassigned,        //u19xx
     cRangeUnassigned,        //u1axx
     cRangeUnassigned,        //u1bxx
     cRangeUnassigned,        //u1cxx
     cRangeUnassigned,        //u1dxx
     cRangeSetLatin,          //u1exx
     cRangeGreek,             //u1fxx
   },
   { //table for 2x--
     cRangeSetLatin,          //u20xx
     cRangeSetLatin,          //u21xx
     cRangeMathOperators,     //u22xx
     cRangeMiscTechnical,     //u23xx
     cRangeControlOpticalEnclose, //u24xx
     cRangeBoxBlockGeometrics, //u25xx
     cRangeMiscSymbols,       //u26xx
     cRangeDingbats,          //u27xx
     cRangeBraillePattern,    //u28xx
     cRangeUnassigned,        //u29xx
     cRangeUnassigned,        //u2axx
     cRangeUnassigned,        //u2bxx
     cRangeUnassigned,        //u2cxx
     cRangeUnassigned,        //u2dxx
     cRangeSetCJK,            //u2exx
     cRangeSetCJK,            //u2fxx
   },
   {  //table for ax--
     cRangeYi,                //ua0xx
     cRangeYi,                //ua1xx
     cRangeYi,                //ua2xx
     cRangeYi,                //ua3xx
     cRangeYi,                //ua4xx
     cRangeUnassigned,        //ua5xx
     cRangeUnassigned,        //ua6xx
     cRangeUnassigned,        //ua7xx
     cRangeUnassigned,        //ua8xx
     cRangeUnassigned,        //ua9xx
     cRangeUnassigned,        //uaaxx
     cRangeUnassigned,        //uabxx
     cRangeKorean,            //uacxx
     cRangeKorean,            //uadxx
     cRangeKorean,            //uaexx
     cRangeKorean,            //uafxx
   },
   {  //table for dx--
     cRangeKorean,            //ud0xx
     cRangeKorean,            //ud1xx
     cRangeKorean,            //ud2xx
     cRangeKorean,            //ud3xx
     cRangeKorean,            //ud4xx
     cRangeKorean,            //ud5xx
     cRangeKorean,            //ud6xx
     cRangeKorean,            //ud7xx
     cRangeSurrogate,         //ud8xx
     cRangeSurrogate,         //ud9xx
     cRangeSurrogate,         //udaxx
     cRangeSurrogate,         //udbxx
     cRangeSurrogate,         //udcxx
     cRangeSurrogate,         //uddxx
     cRangeSurrogate,         //udexx
     cRangeSurrogate,         //udfxx
   },
   { // table for fx--
     cRangePrivate,           //uf0xx
     cRangePrivate,           //uf1xx
     cRangePrivate,           //uf2xx
     cRangePrivate,           //uf3xx
     cRangePrivate,           //uf4xx
     cRangePrivate,           //uf5xx
     cRangePrivate,           //uf6xx
     cRangePrivate,           //uf7xx
     cRangePrivate,           //uf8xx
     cRangeSetCJK,            //uf9xx
     cRangeSetCJK,            //ufaxx
     cRangeArabic,            //ufbxx, includes alphabic presentation form
     cRangeArabic,            //ufcxx
     cRangeArabic,            //ufdxx
     cRangeArabic,            //ufexx, includes Combining half marks,
                              //                CJK compatibility forms,
                              //                CJK compatibility forms,
                              //                small form variants
     cRangeTableBase+8,       //uffxx, halfwidth and fullwidth forms, includes Specials
   },
   { //table for 0x0500 - 0x05ff
     cRangeCyrillic,          //u050x
     cRangeCyrillic,          //u051x
     cRangeCyrillic,          //u052x
     cRangeArmenian,          //u053x
     cRangeArmenian,          //u054x
     cRangeArmenian,          //u055x
     cRangeArmenian,          //u056x
     cRangeArmenian,          //u057x
     cRangeArmenian,          //u058x
     cRangeHebrew,            //u059x
     cRangeHebrew,            //u05ax
     cRangeHebrew,            //u05bx
     cRangeHebrew,            //u05cx
     cRangeHebrew,            //u05dx
     cRangeHebrew,            //u05ex
     cRangeHebrew,            //u05fx
   },
   { //table for 0xff00 - 0xffff
     cRangeSetCJK,            //uff0x, fullwidth latin
     cRangeSetCJK,            //uff1x, fullwidth latin
     cRangeSetCJK,            //uff2x, fullwidth latin
     cRangeSetCJK,            //uff3x, fullwidth latin
     cRangeSetCJK,            //uff4x, fullwidth latin
     cRangeSetCJK,            //uff5x, fullwidth latin
     cRangeSetCJK,            //uff6x, halfwidth katakana
     cRangeSetCJK,            //uff7x, halfwidth katakana
     cRangeSetCJK,            //uff8x, halfwidth katakana
     cRangeSetCJK,            //uff9x, halfwidth katakana
     cRangeSetCJK,            //uffax, halfwidth hangul jamo
     cRangeSetCJK,            //uffbx, halfwidth hangul jamo
     cRangeSetCJK,            //uffcx, halfwidth hangul jamo
     cRangeSetCJK,            //uffdx, halfwidth hangul jamo
     cRangeSetCJK,            //uffex, fullwidth symbols
     cRangeSpecials,          //ufffx, Specials
   },
 };

 // Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
 // code points so that the number of entries in the tertiary range
 // table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
 // Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
 // syllabaries take multiple chunks and Ogham and Runic share a single chunk.
 static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);

 static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] =
 { //table for 0x0700 - 0x1600
     cRangeSyriac,            //u070x
     cRangeThaana,            //u078x
     cRangeUnassigned,        //u080x  place holder(resolved in the 2ndary tab.)
     cRangeUnassigned,        //u088x  place holder(resolved in the 2ndary tab.)
     cRangeDevanagari,        //u090x
     cRangeBengali,           //u098x
     cRangeGurmukhi,          //u0a0x
     cRangeGujarati,          //u0a8x
     cRangeOriya,             //u0b0x
     cRangeTamil,             //u0b8x
     cRangeTelugu,            //u0c0x
     cRangeKannada,           //u0c8x
     cRangeMalayalam,         //u0d0x
     cRangeSinhala,           //u0d8x
     cRangeThai,              //u0e0x
     cRangeLao,               //u0e8x
     cRangeTibetan,           //u0f0x  place holder(resolved in the 2ndary tab.)
     cRangeTibetan,           //u0f8x  place holder(resolved in the 2ndary tab.)
     cRangeMyanmar,           //u100x
     cRangeGeorgian,          //u108x
     cRangeKorean,            //u110x  place holder(resolved in the 2ndary tab.)
     cRangeKorean,            //u118x  place holder(resolved in the 2ndary tab.)
     cRangeEthiopic,          //u120x  place holder(resolved in the 2ndary tab.)
     cRangeEthiopic,          //u128x  place holder(resolved in the 2ndary tab.)
     cRangeEthiopic,          //u130x
     cRangeCherokee,          //u138x
     cRangeCanadian,          //u140x  place holder(resolved in the 2ndary tab.)
     cRangeCanadian,          //u148x  place holder(resolved in the 2ndary tab.)
     cRangeCanadian,          //u150x  place holder(resolved in the 2ndary tab.)
     cRangeCanadian,          //u158x  place holder(resolved in the 2ndary tab.)
     cRangeCanadian,          //u160x
     cRangeOghamRunic,        //u168x  this contains two scripts, Ogham & Runic
 };

 // A two level index is almost enough for locating a range, with the
 // exception of u03xx and u05xx. Since we don't really care about range for
 // combining diacritical marks in our font application, they are
 // not discriminated further.  Future adoption of this method for other use
 // should be aware of this limitation. The implementation can be extended if
 // there is such a need.
 // For Indic, Southeast Asian scripts and some other scripts between
 // U+0700 and U+16FF, it's extended to the third level.
 unsigned int findCharUnicodeRange(UChar32 ch)
 {
     if (ch >= 0xFFFF)
         return 0;

     unsigned int range;

     //search the first table
     range = gUnicodeSubrangeTable[0][ch >> 12];

     if (range < cRangeTableBase)
         // we try to get a specific range
         return range;

     // otherwise, we have one more table to look at
     range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8];
     if (range < cRangeTableBase)
         return range;
     if (range < cRangeTertiaryTable)
         return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4];

     // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
     return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
 }

 const char* langGroupFromUnicodeRange(unsigned char unicodeRange)
 {
     if (cRangeSpecificItemNum > unicodeRange)
         return gUnicodeRangeToLangGroupTable[unicodeRange];
     return 0;
 }

 }
	/*
	* Copyright (C) 2007 Apple Computer, Inc.
	*
	* Portions are Copyright (C) 1998 Netscape Communications Corporation.
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*
	* Alternatively, the contents of this file may be used under the terms
	* of either the Mozilla Public License Version 1.1, found at
	* http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
	* License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
	* (the "GPL"), in which case the provisions of the MPL or the GPL are
	* applicable instead of those above. If you wish to allow use of your
	* version of this file only under the terms of one of those two
	* licenses (the MPL or the GPL) and not to allow others to use your
	* version of this file under the LGPL, indicate your decision by
	* deletingthe provisions above and replace them with the notice and
	* other provisions required by the MPL or the GPL, as the case may be.
	* If you do not delete the provisions above, a recipient may use your
	* version of this file under any of the LGPL, the MPL or the GPL.
	*/

	#include "config.h"
	#include "UnicodeRange.h"

	namespace WebCore {

	// This table depends on unicode range definitions.
	// Each item's index must correspond to a unicode range value
	// eg. x-cyrillic = LangGroupTable[cRangeCyrillic]
	static const char* gUnicodeRangeToLangGroupTable[] =
	{
	"x-cyrillic",
	"el",
	"tr",
	"he",
	"ar",
	"x-baltic",
	"th",
	"ko",
	"ja",
	"zh-CN",
	"zh-TW",
	"x-devanagari",
	"x-tamil",
	"x-armn",
	"x-beng",
	"x-cans",
	"x-ethi",
	"x-geor",
	"x-gujr",
	"x-guru",
	"x-khmr",
	"x-mlym"
	};

	/**********************************************************************
	* Unicode subranges as defined in unicode 3.0
	* x-western, x-central-euro, tr, x-baltic -> latin
	* 0000 - 036f
	* 1e00 - 1eff
	* 2000 - 206f (general punctuation)
	* 20a0 - 20cf (currency symbols)
	* 2100 - 214f (letterlike symbols)
	* 2150 - 218f (Number Forms)
	* el -> greek
	* 0370 - 03ff
	* 1f00 - 1fff
	* x-cyrillic -> cyrillic
	* 0400 - 04ff
	* he -> hebrew
	* 0590 - 05ff
	* ar -> arabic
	* 0600 - 06ff
	* fb50 - fdff (arabic presentation forms)
	* fe70 - feff (arabic presentation forms b)
	* th - thai
	* 0e00 - 0e7f
	* ko -> korean
	* ac00 - d7af (hangul Syllables)
	* 1100 - 11ff (jamo)
	* 3130 - 318f (hangul compatibility jamo)
	* ja
	* 3040 - 309f (hiragana)
	* 30a0 - 30ff (katakana)
	* zh-CN
	* zh-TW
	*
	* CJK
	* 3100 - 312f (bopomofo)
	* 31a0 - 31bf (bopomofo extended)
	* 3000 - 303f (CJK Symbols and Punctuation)
	* 2e80 - 2eff (CJK radicals supplement)
	* 2f00 - 2fdf (Kangxi Radicals)
	* 2ff0 - 2fff (Ideographic Description Characters)
	* 3190 - 319f (kanbun)
	* 3200 - 32ff (Enclosed CJK letters and Months)
	* 3300 - 33ff (CJK compatibility)
	* 3400 - 4dbf (CJK Unified Ideographs Extension A)
	* 4e00 - 9faf (CJK Unified Ideographs)
	* f900 - fa5f (CJK Compatibility Ideographs)
	* fe30 - fe4f (CJK compatibility Forms)
	* ff00 - ffef (halfwidth and fullwidth forms)
	*
	* Armenian
	* 0530 - 058f
	* Sriac
	* 0700 - 074f
	* Thaana
	* 0780 - 07bf
	* Devanagari
	* 0900 - 097f
	* Bengali
	* 0980 - 09ff
	* Gurmukhi
	* 0a00 - 0a7f
	* Gujarati
	* 0a80 - 0aff
	* Oriya
	* 0b00 - 0b7f
	* Tamil
	* 0b80 - 0bff
	* Telugu
	* 0c00 - 0c7f
	* Kannada
	* 0c80 - 0cff
	* Malayalam
	* 0d00 - 0d7f
	* Sinhala
	* 0d80 - 0def
	* Lao
	* 0e80 - 0eff
	* Tibetan
	* 0f00 - 0fbf
	* Myanmar
	* 1000 - 109f
	* Georgian
	* 10a0 - 10ff
	* Ethiopic
	* 1200 - 137f
	* Cherokee
	* 13a0 - 13ff
	* Canadian Aboriginal Syllabics
	* 1400 - 167f
	* Ogham
	* 1680 - 169f
	* Runic
	* 16a0 - 16ff
	* Khmer
	* 1780 - 17ff
	* Mongolian
	* 1800 - 18af
	* Misc - superscripts and subscripts
	* 2070 - 209f
	* Misc - Combining Diacritical Marks for Symbols
	* 20d0 - 20ff
	* Misc - Arrows
	* 2190 - 21ff
	* Misc - Mathematical Operators
	* 2200 - 22ff
	* Misc - Miscellaneous Technical
	* 2300 - 23ff
	* Misc - Control picture
	* 2400 - 243f
	* Misc - Optical character recognition
	* 2440 - 2450
	* Misc - Enclose Alphanumerics
	* 2460 - 24ff
	* Misc - Box Drawing
	* 2500 - 257f
	* Misc - Block Elements
	* 2580 - 259f
	* Misc - Geometric Shapes
	* 25a0 - 25ff
	* Misc - Miscellaneous Symbols
	* 2600 - 267f
	* Misc - Dingbats
	* 2700 - 27bf
	* Misc - Braille Patterns
	* 2800 - 28ff
	* Yi Syllables
	* a000 - a48f
	* Yi radicals
	* a490 - a4cf
	* Alphabetic Presentation Forms
	* fb00 - fb4f
	* Misc - Combining half Marks
	* fe20 - fe2f
	* Misc - small form variants
	* fe50 - fe6f
	* Misc - Specials
	* fff0 - ffff
	*********************************************************************/

	static const unsigned cNumSubTables = 9;
	static const unsigned cSubTableSize = 16;

	static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] =
	{
	{ // table for X---
	cRangeTableBase+1, //u0xxx
	cRangeTableBase+2, //u1xxx
	cRangeTableBase+3, //u2xxx
	cRangeSetCJK, //u3xxx
	cRangeSetCJK, //u4xxx
	cRangeSetCJK, //u5xxx
	cRangeSetCJK, //u6xxx
	cRangeSetCJK, //u7xxx
	cRangeSetCJK, //u8xxx
	cRangeSetCJK, //u9xxx
	cRangeTableBase+4, //uaxxx
	cRangeKorean, //ubxxx
	cRangeKorean, //ucxxx
	cRangeTableBase+5, //udxxx
	cRangePrivate, //uexxx
	cRangeTableBase+6 //ufxxx
	},
	{ //table for 0X--
	cRangeSetLatin, //u00xx
	cRangeSetLatin, //u01xx
	cRangeSetLatin, //u02xx
	cRangeGreek, //u03xx XXX 0300-036f is in fact cRangeCombiningDiacriticalMarks
	cRangeCyrillic, //u04xx
	cRangeTableBase+7, //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
	cRangeArabic, //u06xx
	cRangeTertiaryTable, //u07xx
	cRangeUnassigned, //u08xx
	cRangeTertiaryTable, //u09xx
	cRangeTertiaryTable, //u0axx
	cRangeTertiaryTable, //u0bxx
	cRangeTertiaryTable, //u0cxx
	cRangeTertiaryTable, //u0dxx
	cRangeTertiaryTable, //u0exx
	cRangeTibetan, //u0fxx
	},
	{ //table for 1x--
	cRangeTertiaryTable, //u10xx
	cRangeKorean, //u11xx
	cRangeEthiopic, //u12xx
	cRangeTertiaryTable, //u13xx
	cRangeCanadian, //u14xx
	cRangeCanadian, //u15xx
	cRangeTertiaryTable, //u16xx
	cRangeKhmer, //u17xx
	cRangeMongolian, //u18xx
	cRangeUnassigned, //u19xx
	cRangeUnassigned, //u1axx
	cRangeUnassigned, //u1bxx
	cRangeUnassigned, //u1cxx
	cRangeUnassigned, //u1dxx
	cRangeSetLatin, //u1exx
	cRangeGreek, //u1fxx
	},
	{ //table for 2x--
	cRangeSetLatin, //u20xx
	cRangeSetLatin, //u21xx
	cRangeMathOperators, //u22xx
	cRangeMiscTechnical, //u23xx
	cRangeControlOpticalEnclose, //u24xx
	cRangeBoxBlockGeometrics, //u25xx
	cRangeMiscSymbols, //u26xx
	cRangeDingbats, //u27xx
	cRangeBraillePattern, //u28xx
	cRangeUnassigned, //u29xx
	cRangeUnassigned, //u2axx
	cRangeUnassigned, //u2bxx
	cRangeUnassigned, //u2cxx
	cRangeUnassigned, //u2dxx
	cRangeSetCJK, //u2exx
	cRangeSetCJK, //u2fxx
	},
	{ //table for ax--
	cRangeYi, //ua0xx
	cRangeYi, //ua1xx
	cRangeYi, //ua2xx
	cRangeYi, //ua3xx
	cRangeYi, //ua4xx
	cRangeUnassigned, //ua5xx
	cRangeUnassigned, //ua6xx
	cRangeUnassigned, //ua7xx
	cRangeUnassigned, //ua8xx
	cRangeUnassigned, //ua9xx
	cRangeUnassigned, //uaaxx
	cRangeUnassigned, //uabxx
	cRangeKorean, //uacxx
	cRangeKorean, //uadxx
	cRangeKorean, //uaexx
	cRangeKorean, //uafxx
	},
	{ //table for dx--
	cRangeKorean, //ud0xx
	cRangeKorean, //ud1xx
	cRangeKorean, //ud2xx
	cRangeKorean, //ud3xx
	cRangeKorean, //ud4xx
	cRangeKorean, //ud5xx
	cRangeKorean, //ud6xx
	cRangeKorean, //ud7xx
	cRangeSurrogate, //ud8xx
	cRangeSurrogate, //ud9xx
	cRangeSurrogate, //udaxx
	cRangeSurrogate, //udbxx
	cRangeSurrogate, //udcxx
	cRangeSurrogate, //uddxx
	cRangeSurrogate, //udexx
	cRangeSurrogate, //udfxx
	},
	{ // table for fx--
	cRangePrivate, //uf0xx
	cRangePrivate, //uf1xx
	cRangePrivate, //uf2xx
	cRangePrivate, //uf3xx
	cRangePrivate, //uf4xx
	cRangePrivate, //uf5xx
	cRangePrivate, //uf6xx
	cRangePrivate, //uf7xx
	cRangePrivate, //uf8xx
	cRangeSetCJK, //uf9xx
	cRangeSetCJK, //ufaxx
	cRangeArabic, //ufbxx, includes alphabic presentation form
	cRangeArabic, //ufcxx
	cRangeArabic, //ufdxx
	cRangeArabic, //ufexx, includes Combining half marks,
	// CJK compatibility forms,
	// CJK compatibility forms,
	// small form variants
	cRangeTableBase+8, //uffxx, halfwidth and fullwidth forms, includes Specials
	},
	{ //table for 0x0500 - 0x05ff
	cRangeCyrillic, //u050x
	cRangeCyrillic, //u051x
	cRangeCyrillic, //u052x
	cRangeArmenian, //u053x
	cRangeArmenian, //u054x
	cRangeArmenian, //u055x
	cRangeArmenian, //u056x
	cRangeArmenian, //u057x
	cRangeArmenian, //u058x
	cRangeHebrew, //u059x
	cRangeHebrew, //u05ax
	cRangeHebrew, //u05bx
	cRangeHebrew, //u05cx
	cRangeHebrew, //u05dx
	cRangeHebrew, //u05ex
	cRangeHebrew, //u05fx
	},
	{ //table for 0xff00 - 0xffff
	cRangeSetCJK, //uff0x, fullwidth latin
	cRangeSetCJK, //uff1x, fullwidth latin
	cRangeSetCJK, //uff2x, fullwidth latin
	cRangeSetCJK, //uff3x, fullwidth latin
	cRangeSetCJK, //uff4x, fullwidth latin
	cRangeSetCJK, //uff5x, fullwidth latin
	cRangeSetCJK, //uff6x, halfwidth katakana
	cRangeSetCJK, //uff7x, halfwidth katakana
	cRangeSetCJK, //uff8x, halfwidth katakana
	cRangeSetCJK, //uff9x, halfwidth katakana
	cRangeSetCJK, //uffax, halfwidth hangul jamo
	cRangeSetCJK, //uffbx, halfwidth hangul jamo
	cRangeSetCJK, //uffcx, halfwidth hangul jamo
	cRangeSetCJK, //uffdx, halfwidth hangul jamo
	cRangeSetCJK, //uffex, fullwidth symbols
	cRangeSpecials, //ufffx, Specials
	},
	};

	// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
	// code points so that the number of entries in the tertiary range
	// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
	// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
	// syllabaries take multiple chunks and Ogham and Runic share a single chunk.
	static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);

	static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] =
	{ //table for 0x0700 - 0x1600
	cRangeSyriac, //u070x
	cRangeThaana, //u078x
	cRangeUnassigned, //u080x place holder(resolved in the 2ndary tab.)
	cRangeUnassigned, //u088x place holder(resolved in the 2ndary tab.)
	cRangeDevanagari, //u090x
	cRangeBengali, //u098x
	cRangeGurmukhi, //u0a0x
	cRangeGujarati, //u0a8x
	cRangeOriya, //u0b0x
	cRangeTamil, //u0b8x
	cRangeTelugu, //u0c0x
	cRangeKannada, //u0c8x
	cRangeMalayalam, //u0d0x
	cRangeSinhala, //u0d8x
	cRangeThai, //u0e0x
	cRangeLao, //u0e8x
	cRangeTibetan, //u0f0x place holder(resolved in the 2ndary tab.)
	cRangeTibetan, //u0f8x place holder(resolved in the 2ndary tab.)
	cRangeMyanmar, //u100x
	cRangeGeorgian, //u108x
	cRangeKorean, //u110x place holder(resolved in the 2ndary tab.)
	cRangeKorean, //u118x place holder(resolved in the 2ndary tab.)
	cRangeEthiopic, //u120x place holder(resolved in the 2ndary tab.)
	cRangeEthiopic, //u128x place holder(resolved in the 2ndary tab.)
	cRangeEthiopic, //u130x
	cRangeCherokee, //u138x
	cRangeCanadian, //u140x place holder(resolved in the 2ndary tab.)
	cRangeCanadian, //u148x place holder(resolved in the 2ndary tab.)
	cRangeCanadian, //u150x place holder(resolved in the 2ndary tab.)
	cRangeCanadian, //u158x place holder(resolved in the 2ndary tab.)
	cRangeCanadian, //u160x
	cRangeOghamRunic, //u168x this contains two scripts, Ogham & Runic
	};

	// A two level index is almost enough for locating a range, with the
	// exception of u03xx and u05xx. Since we don't really care about range for
	// combining diacritical marks in our font application, they are
	// not discriminated further. Future adoption of this method for other use
	// should be aware of this limitation. The implementation can be extended if
	// there is such a need.
	// For Indic, Southeast Asian scripts and some other scripts between
	// U+0700 and U+16FF, it's extended to the third level.
	unsigned int findCharUnicodeRange(UChar32 ch)
	{
	if (ch >= 0xFFFF)
	return 0;

	unsigned int range;

	//search the first table
	range = gUnicodeSubrangeTable[0][ch >> 12];

	if (range < cRangeTableBase)
	// we try to get a specific range
	return range;

	// otherwise, we have one more table to look at
	range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8];
	if (range < cRangeTableBase)
	return range;
	if (range < cRangeTertiaryTable)
	return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4];

	// Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
	return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
	}

	const char* langGroupFromUnicodeRange(unsigned char unicodeRange)
	{
	if (cRangeSpecificItemNum > unicodeRange)
	return gUnicodeRangeToLangGroupTable[unicodeRange];
	return 0;
	}

	}